gen.div <- read.delim(paste0(folder_path, 'genetic_diversity/GenDiv_subsample5Ind.txt')) %>%
filter(EstPi > 0, !outPi %in% 'out', !outTheta %in% 'out')
##load phylogenies and speciation rates from 100 posterior trees + MCC tree
TreeRatesSet <- readRDS(paste0(folder_path,'speciation_rate/output/MCCposterior100_treeMAPS.rds'))
treeMCC <- TreeRatesSet[["treeMCC"]][["tree"]]
ratesMCC <- TreeRatesSet[["treeMCC"]][["rates"]][!is.na(names(TreeRatesSet[["treeMCC"]][["rates"]]))][-c(1:4)] ##same as MAPS[5:(nedges+4)]
parClaDS <- tibble(rates = purrr::map(TreeRatesSet,'rates')) %>%
hoist(rates,
sigma = "sigma",
alpha = "alpha",
epsilon = "epsilon",
lambda_0 = "lambda_0") %>%
mutate(set = names(TreeRatesSet)) %>%
select(-rates)
# load(paste0(folder_path,'speciation_rate/output/upham_4064sp_FR_MCCposterior100.rdata')) #loads TreeSet
tipLengths <- cbind.data.frame(edge = treeMCC$edge,
time = treeMCC$edge.length) %>%
filter(edge.2 <= Ntip(treeMCC)) %>%
mutate(species = treeMCC$tip.label,
nodes_sister = ifelse(edge.1 %in% edge.1[duplicated(edge.1)], "sister","nonSister"))
clades <- read.delim(paste0(folder_path,'speciation_rate/input/MamPhy_5911sp_tipGenFamOrdCladeGenesSampPC.txt')) %>%
drop_na(PC) %>%
mutate(species = word(tiplabel, 1,2, sep = "_"),
clades = sub("^PC\\d+_", "", PC)) %>%
select(species, clades, ord, fam) %>%
filter(species %in% treeMCC$tip.labels)
changeClades <- select(clades, clades) %>%
distinct() %>%
arrange(clades) %>%
filter(clades %in% c("Cetartiodactyla", "EmballonuridRelated",
"GuineaPigRelated","Marsupials", "PhyllostomidRelated",
"SquirrelRelated","VespertilionidRelated")) %>%
mutate(newNames = c("Artiodactyla", "Emballonuroidea",
"Guinea Pig-related","Marsupialia", "Noctilionoidea",
"Squirrel-related", "Vespertilionoidea"))
Nodes <- read.delim(paste0(folder_path,'speciation_rate/Upham_FBD_clades_nodes.txt'))
### to include the clades that had fewer species as an arc in the
for(clade in unique(clades$clades)[!unique(clades$clades) %in% Nodes$clades]){
a <- as.character(clades[clades$clades %in% clade,'species'])
cladeNode <- ape::getMRCA(treeMCC, a)
if(!is.null(cladeNode)){
Nodes <- bind_rows(Nodes, data.frame(clades = clade, node = cladeNode))
}
}
spRate <- read.delim(paste0(folder_path,'speciation_rate/output/MCCposterior100_tipRate.txt')) %>%
rename(set = treeN ) %>%
left_join(., clades, by = 'species')
gendivSpRate <- spRate %>%
left_join(., select(gen.div, species, EstPi, subPi_mean, EstTheta, subTheta_mean), by = 'species')
traitData <- read.delim(paste0(folder_path,'traits/matchedTraits.txt'), stringsAsFactors = FALSE) %>%
select(-Family) %>%
mutate(mean_temp = mean_temp + abs(min(mean_temp, na.rm = T)) + 1)
gendivSpRateTrait <- gendivSpRate %>%
left_join(., traitData, by = 'species')
MutRateAll <- readRDS(paste0(folder_path, 'mutationRate/output/mutationRate_cytb3rdcodonPAML.rds'))
gendivSpRateMutRate <- gendivSpRateTrait %>%
left_join(., MutRateAll, by = c('set','species')) %>%
select(-mutrate) %>% ## mutation rate per Mya and not per year
mutate(timeyear = time * 1000000,
GenerationLength_y = GenerationLength_d/365,
timeGeneration = timeyear/GenerationLength_y,
mutRate = expNsub / timeGeneration,
Ne = EstPi / mutRate) %>%
arrange(set)
### Load PGLS results
PGLSglobal0 <- readRDS(paste0(folder_path,'pgls/global/global_gendivSpRate_PGLSresults.rds'))
PGLSglobalsub <- readRDS(paste0(folder_path,'pgls/global/global_gendivSpRate_subsample6Ind_PGLSresults.rds'))
PGLSglobal <- bind_rows(PGLSglobal0, PGLSglobalsub) %>%
rename(Term = term)
PGLSclade0 <- readRDS(paste0(folder_path,'pgls/clade/clade_gendivSpRate_PGLSresults.rds'))
PGLScladesub <- readRDS(paste0(folder_path,'pgls/clade/clade_gendivSpRate_subsample6Ind_PGLSresults.rds'))
PGLSclade <- bind_rows(PGLSclade0, PGLScladesub) %>%
rename(Term = term)
PGLSglobal_traits <- readRDS(paste0(folder_path,'pgls/global_traits/global_gendivSpRateTraits_PGLSresults.rds')) %>%
rename(Term = term)
PGLSglobal_mutRate <- readRDS(paste0(folder_path,'pgls/global_mutRate/global_gendivSpRateMutRate_PGLSresults.rds')) %>%
rename(Term = term)
### Load BMLM results
BMLMglobal0 <- readRDS(paste0(folder_path,'bmlm/global/global_allPosterior.rds'))
BMLMglobalsub <- readRDS(paste0(folder_path,'bmlm/global/global_allPosterior_subsample6Ind.rds'))
BMLMglobal <- c(BMLMglobal0, BMLMglobalsub)
BMLMglobal_traits <- readRDS(paste0(folder_path,'bmlm/global_traits/global_traits_allPosterior.rds'))
BMLMglobal_mutRate <- readRDS(paste0(folder_path,'bmlm/global_mutRate/global_mutRate_allPosterior.rds'))
colSet <- data.frame(clades = unique(PGLSclade$clade),
col = iwanthue(length(unique(PGLSclade$clade))))
cladesSum <- data.frame(table(clades$clades),
stringsAsFactors = F) %>%
rename(clades = Var1) %>%
left_join(colSet) ### code from Odile plot_treePi.R prepare the data ####
tipRatesMCC <- spRate %>%
filter(set %in% "treeMCC") %>%
pull(tipRate)
sub_tree = treeMCC
sub_rates = ratesMCC
rep = map_rates_tipNroot(sub_tree, sub_rates = sub_rates, treeMCC, rates = rep(NaN,
nrow(treeMCC$edge)))
new_rates = rep$rates
# clades_root = as.numeric(c(Nodes$node))
clades_root = as.numeric(c(Nodes[Nodes$clades %in% unique(PGLSclade$clade), "node"]))
clades_tips = list()
clades_edges = list()
for (i in unique(Nodes$clades)) {
# unique(Nodes[Nodes$clades %in% unique(PGLSclade$clade),'clades'])
clades_tips[[i]] = getDescendants(sub_tree, Nodes[Nodes$clades %in% i, "node"])
}
names(clades_tips)[names(clades_tips) %in% changeClades$clades] <- na.omit(changeClades[match(names(clades_tips),
changeClades$clades), 2])
pglsSet <- unique(PGLSclade$clade)
pglsSet[pglsSet %in% changeClades$clades] <- na.omit(changeClades[match(pglsSet,
changeClades$clades), 2])
#### a few options ####
save_pdf = F
v1_name = "EstPi"
col1_nbins = 100
col1_offset = 20
col1_pal = "YlOrRd"
D = 0.6
L1 = 0.7
v2_name = "EstTheta"
col2_nbins = 100
col2_offset = 5
col2_pal = "YlGnBu"
L2 = 0.7
vertical = 1
#### start pdf ####
if (vertical == 1) {
if (save_pdf)
pdf(paste0(fig_path, "Figure1.pdf"), width = 18, height = 14)
# layout(matrix(c(1,1,2,3), ncol = 2 , byrow = T), heights = c(3,1))
layout(matrix(c(1, 2, 1, 3), ncol = 2, byrow = T), widths = c(3, 1))
} else if (vertical == 2) {
if (save_pdf)
pdf(paste0("~/Desktop/Figure1_treeSpRateGenDiv.pdf"), width = 12, height = 15)
# layout(matrix(c(1,1,2,3), ncol = 2 , byrow = T), heights = c(3,1))
layout(matrix(c(1, 1, 3, 2), ncol = 2, byrow = T), heights = c(3, 1))
} else if (vertical == 3) {
if (save_pdf)
pdf(paste0("~/Desktop/Figure1_treeSpRateGenDiv.pdf"), width = 12, height = 14)
# layout(matrix(c(1,1,2,3), ncol = 2 , byrow = T), heights = c(3,1))
layout(matrix(c(1, 1, 2, 3), ncol = 2, byrow = T), heights = c(3, 0.6))
} else if (vertical == 4) {
if (save_pdf)
pdf(paste0("~/Desktop/Figure1_treeSpRateGenDiv.pdf"), width = 12, height = 15)
# layout(matrix(c(1,1,2,3), ncol = 2 , byrow = T), heights = c(3,1))
layout(matrix(c(2, 1, 3), ncol = 1, byrow = T), heights = c(0.6, 3, 0.6))
} else if (vertical == 5) {
if (save_pdf)
pdf(paste0("~/Desktop/Figure1_treeSpRateGenDiv.pdf"), width = 12, height = 15)
# layout(matrix(c(1,1,2,3), ncol = 2 , byrow = T), heights = c(3,1))
layout(matrix(c(4, 2, 1, 1, 3, 5), ncol = 2, byrow = T), heights = c(0.6, 3,
0.6))
}
#### plot the tree #####
par(mar = c(2, 1, 2, 0))
plot_tree = treeMCC
plot_tree$tip.label[] = ""
plot_tree$tip.label[1] = "........" # to make the tree smaller, long invisible tip label was added
tipcol = rgb(255, 255, 255, alpha = 255, maxColorValue = 255)
col_tree = invisible(plot.with.rate.withNaNs(plot_tree, c(new_rates), NaN_color = "gray90",
leg = F, same.scale = T, lwd = 2, log = T, show.tip.label = T, tip.color = tipcol))
#### collect tips coordinates ####
lastPP <- get("last_plot.phylo", envir = .PlotPhyloEnv)
tip <- 1:lastPP$Ntip
XX <- lastPP$xx #[tip]
YY <- lastPP$yy #[tip]
#### chose tip lines colors ####
col_v1 = c(colorRampPalette((brewer.pal(n = 8, name = col1_pal)))(col1_offset + col1_nbins)) # color palette
v1 = c()
for (i in treeMCC$tip.label) {
if (i %in% gen.div$species) {
v1 = c(v1, as.numeric(gen.div[gen.div$species == i, v1_name]))
} else {
v1 = c(v1, NA)
}
}
v1_transformed = log(v1 + 0.005)
range_v1 = range(v1_transformed, na.rm = T)
lab_v1 = c(0, 0.01, 0.02, 0.05, 0.1)
at_v1 = log(lab_v1 + 0.005)
col1 = col_v1[round(((v1_transformed - range_v1[1])/(range_v1[2] - range_v1[1])) *
(col1_nbins - 1)) + col1_offset + 1]
#### add them to the tree ####
line_position = D + c(0, L1)
total_depth = sqrt(XX[1]^2 + YY[1]^2)
add2depth = 0.05 * total_depth
for (i in 1:length(treeMCC$tip.label)) {
if (!is.na(col1[i])) {
addx = add2depth * line_position * XX[i]/total_depth
addy = add2depth * line_position * YY[i]/total_depth
lines(XX[i] + addx, YY[i] + addy, col = col1[i], lwd = 3, xpd = T)
}
}
#### chose tip lines colors / 2nd measure ####
col_v2 = c("#FFFFFF", colorRampPalette((brewer.pal(n = 8, name = col2_pal)))(col2_offset +
col2_nbins)) # color palette
v2 = c()
for (i in treeMCC$tip.label) {
if (i %in% gen.div$species) {
v2 = c(v2, as.numeric(gen.div[gen.div$species == i, v2_name]))
} else {
v2 = c(v2, NA)
}
}
v2_transformed = log(v2 + 0.005)
range_v2 = range(v2_transformed, na.rm = T)
lab_v2 = c(0, 0.01, 0.02, 0.05, 0.1)
at_v2 = log(lab_v2 + 0.005)
col2 = col_v2[round(((v2_transformed - range_v2[1])/(range_v2[2] - range_v2[1])) *
(col2_nbins - 1)) + col2_offset + 1] #### add them to the tree
line_position = D + L1 + 0.05 + c(0, L2)
total_depth = sqrt(XX[1]^2 + YY[1]^2)
add2depth = 0.05 * total_depth
for (i in 1:length(treeMCC$tip.label)) {
if (!is.na(col2[i])) {
addx = add2depth * line_position * XX[i]/total_depth
addy = add2depth * line_position * YY[i]/total_depth
lines(XX[i] + addx, YY[i] + addy, col = col2[i], lwd = 3, xpd = T)
}
}
# if (save_pdf) dev.off() legends #### image.plot(z = range_v1,col =
# col_v1[-col1_offset-1], horizontal=F,legend.only = T, legend.mar = 12,
# legend.shrink\t= 0.3,axis.args=list( at=at_v1, labels=lab_v1))#,)
# image.plot(z = range_v2,col = col_v2[-col2_offset-1],
# horizontal=F,legend.only = T, legend.mar = 6, legend.shrink\t=
# 0.3,axis.args=list( at=at_v2, labels=lab_v2))#,axis.args=list( at=log(ticks),
# labels=ticks))#### dots at roots
### plot nodes of PGLS clades
for (i in clades_root) {
points(XX[i], YY[i], col = "orangered4", bg = "orangered3", pch = 21, cex = 0.8)
}
#### circular arcs for analysed clades ####
l = D + 0.05 + L1 + L2 + D
nCT = length(clades_tips)
# nCT = length(unique(PGLSclade$clade))
clades_names = unique(names(clades_tips))
radius = total_depth + add2depth * (l + D)
add2text = 0.9
further = rep(0, nCT)
further[which(clades_names %in% c("Lagomorpha", "Squirrel-related"))] = add2text
set.seed(1)
# grays = paste0('gray', sample(20:80,nCT)) grays[1] = 'red'
grays = Nodes[, c("clades", "node")] %>%
# filter(clades %in% unique(PGLSclade$clade)) %>%
mutate(ord = seq(1:nCT)) %>%
arrange(desc(node)) %>%
left_join(., cladesSum, by = "clades") %>%
# mutate(col = rep(paste0('gray',c('20','80')),nCT)[1:nCT]) %>%
arrange(ord) %>%
pull(col)
for (i in 1:nCT) {
tips = sort(clades_tips[[i]][clades_tips[[i]] <= (sub_tree$Nnode + 1)])
lt = length(tips)
tips = tips[-((lt - 2):lt)] ###why is not working for every clade with more than one species?
tips = tips[-(1:2)]
xs = XX[tips]
ys = YY[tips]
xs = xs + add2depth * l * xs/total_depth
ys = ys + add2depth * l * ys/total_depth
colGray <- ifelse(as.character(clades_names[i]) %in% pglsSet, grays[i], "gray90")
lines(xs, ys, lwd = 3, col = colGray)
lt = length(tips)
if (lt > 1) {
cl = T
angle_i = acos(xs[floor(lt/2)]/(total_depth + add2depth * l))
if (ys[floor(lt/2)] < 0) {
angle_i = -1 * angle_i
cl = F
}
if (as.character(clades_names[i]) %in% pglsSet) {
arctext(as.character(clades_names[i]), radius = radius + further[i] *
add2depth, middle = angle_i, col = grays[i], clockwise = cl, cex = 1.5,
xpd = T)
}
# arctext(as.character(clades_names[i]), radius = radius +
# further[i]*add2depth, middle = angle_i, col = 'gray20', clockwise =
# cl)
}
}
#### first densplot ####
clades_tips2 <- clades_tips[names(clades_tips) %in% pglsSet]
rangeX = range(XX)
rangeY = range(YY)
relative_width = 0.4
relative_heigth = 0.35
relative_rangeX = relative_width * rangeX
relative_rangeY = relative_width * rangeY
xrel = function(x, rrX = relative_rangeX, from = 0, to = 1) {
newx = from + (to - from) * (x - min(x))/diff(range(x))
newx = newx * diff(rrX) + min(rrX)
return(newx)
}
yrel = function(x, rrX = relative_rangeY, from = 0, to = 1) {
newx = from + (to - from) * (x - min(x))/diff(range(x))
newx = newx * diff(rrX) + min(rrX)
return(newx)
}
polygon(relative_rangeX[c(1, 1, 2, 2)], relative_rangeY[c(1, 2, 2, 1)], border = NA,
col = adjustcolor("white", alpha.f = 0.8))
# plot the density
lower_y = 0.25
densclads = lapply(clades_tips2, function(vect) {
density(log(ratesMCC[sapply(vect, function(x) {
which(treeMCC$edge[, 2] == x)
})]))
})
dens_all = density(log(tipRatesMCC)) #new_rates for all rates and tipRatesMCC for just tip rates
maxy = max(dens_all$y)
for (i in 1:length(densclads)) {
maxy = max(max(densclads[[i]]$y), maxy)
}
newlower_y = yrel(c(0, lower_y, 1))[2]
for (i in 1:length(densclads)) {
lines(xrel(c(range(dens_all$x), densclads[[i]]$x))[-c(1:2)], yrel(c(maxy, densclads[[i]]$y),
from = lower_y)[-1], lwd = 2, col = grays[i])
}
polygon(xrel(dens_all$x)[c(1, 1:length(dens_all$x), length(dens_all$x))], c(newlower_y,
yrel(c(maxy, dens_all$y), from = lower_y)[-1], newlower_y), border = NA, col = adjustcolor("gray80",
alpha.f = 0.5))
lines(xrel(dens_all$x), yrel(c(maxy, dens_all$y), from = lower_y)[-1], lwd = 3)
# ad the color legend
axis_y = 0.15
col_x = xrel(c(range(diff(log(ratesMCC))), min(dens_all$x) + c(diff(range(dens_all$x)) *
(0:length(col_tree$col))/length(col_tree$col))))[-c(1, 2)]
col_y = yrel(c(0, 1), from = axis_y + 0.005, to = lower_y - 0.05)
for (i in 1:length(col_tree$col)) {
polygon(col_x[c(i, i + 1)][c(1, 1, 2, 2)], col_y[c(1, 2, 2, 1)], border = col_tree$col[i],
col = col_tree$col[i])
}
# add the axis
lines(relative_rangeX, yrel(c(0, 0, 1), from = axis_y)[1:2], lwd = 2)
lab = c(0.03, 0.05, 0.1, 0.2, 0.5, 1)
ticks = xrel(sort(c(range(dens_all$x), log(lab))))
text(0, relative_rangeY[1], expression(Speciation ~ rates ~ "(" ~ Myr^"-1" ~ ")"))
for (i in 2:(1 + length(lab))) {
lines(ticks[c(i, i)], yrel(c(0, 1), from = axis_y - 0.015, to = axis_y + 0.015),
lwd = 1.5)
text(labels = lab[i - 1], x = ticks[i], y = yrel(c(0, 1), from = axis_y - 0.06,
to = axis_y + 0.015)[1])
}
#### densities for Pi ####
par(mar = c(2, 0, 2, 1))
plot(1000, xlim = c(-1, 1), ylim = c(-1, 1), axes = F, xlab = "", ylab = "")
lower_y = 0.3
#### First measure ####
relative_rangeX = c(0, 1)
relative_rangeY = c(-1, 1) * 0.9
dens_all1 = density(v1_transformed, na.rm = T)
dens_all2 = density(v2_transformed, na.rm = T)
ry = range(c(dens_all1$x, dens_all2$x, -6.5, -1))
xrel = function(x, rrX = relative_rangeX, from = 0, to = 1) {
newx = from + (to - from) * (x - min(x))/diff(range(x))
newx = newx * diff(rrX) + min(rrX)
return(newx)
}
yrel = function(x, rrX = relative_rangeY, from = 0, to = 1, My = ry[2], my = ry[1]) {
newx = from + (to - from) * (x - my)/(My - my)
newx = newx * diff(rrX) + min(rrX)
return(newx)
}
dens_all = density(v1_transformed, na.rm = T)
# plot the density
densclads = lapply(clades_tips2, function(vect) {
density(v1_transformed[vect[vect < 4065]], na.rm = T)
})
maxy = max(dens_all$y)
for (i in 1:length(densclads)) {
maxy = max(max(densclads[[i]]$y), maxy)
}
newlower_y = xrel(c(0, lower_y, 1))[2]
for (i in 1:length(densclads)) {
lines(y = yrel(c(range(dens_all$x), densclads[[i]]$x))[-c(1:2)], x = -xrel(c(maxy,
densclads[[i]]$y), from = lower_y)[-1], lwd = 2, col = grays[i])
}
polygon(y = yrel(dens_all$x)[c(1, 1:length(dens_all$x), length(dens_all$x))], x = -c(newlower_y,
xrel(c(maxy, dens_all$y), from = lower_y)[-1], newlower_y), border = NA, col = adjustcolor("gray80",
alpha.f = 0.5))
lines(y = yrel(dens_all$x)[c(1, 1:length(dens_all$x), length(dens_all$x))], x = -c(newlower_y,
xrel(c(maxy, dens_all$y), from = lower_y)[-1], newlower_y), lwd = 3)
# ad the color legend
axis_y = 0.15
#
c1 = col_v1
# rc1 = range(round(( (c(v1_transformed) - range_v1[1]) /
# (range_v1[2]-range_v1[1]))*(col1_nbins-1) )+col1_offset+1, na.rm = T)
col_x = yrel(range_v1[1] + diff(range_v1) * (0:length(c1))/length(c1))
# col_x = col_x[rc1[1]:min(length(c1),rc1[2])] c1 =
# c1[rc1[1]:min(length(c1),rc1[2])]
col_y = xrel(c(0, 1), from = axis_y + 0.005, to = lower_y - 0.05)
for (i in 1:length(c1)) {
polygon(y = col_x[c(i, i + 1)][c(1, 1, 2, 2)], x = -col_y[c(1, 2, 2, 1)], border = c1[i],
col = c1[i])
}
polygon(y = c(relative_rangeY[1], col_x[1])[c(1, 1, 2, 2)], x = -col_y[c(1, 2, 2,
1)], border = c1[1], col = c1[1])
lx = length(c1)
polygon(y = c(relative_rangeY[2], col_x[lx])[c(1, 1, 2, 2)], x = -col_y[c(1, 2, 2,
1)], border = c1[lx], col = c1[lx])
# # add the axis lines(relative_rangeX, yrel(c(0,0,1), from = axis_y)[1:2], lwd
# = 2) lab = c(0.03,0.05,0.1,0.2,0.5,1) ticks = xrel(sort(c(range(dens_all$x),
# log(lab))))
# text(0,relative_rangeY[1],expression(speciation~rates~'('~My^'-1'~')')) for
# (i in 2:(1+length(lab))){ lines(ticks[c(i,i)], yrel(c(0,1), from =
# axis_y-0.015, to = axis_y + 0.015), lwd = 1.5) text(labels = lab[i-1], x =
# ticks[i], y = yrel(c(0,1), from = axis_y-0.06, to = axis_y + 0.015)[1]) }
#### Second measure ####
relative_rangeX = c(0, 1)
xrel = function(x, rrX = relative_rangeX, from = 0, to = 1) {
newx = from + (to - from) * (x - min(x))/diff(range(x))
newx = newx * diff(rrX) + min(rrX)
return(newx)
}
yrel = function(x, rrX = relative_rangeY, from = 0, to = 1, My = ry[2], my = ry[1]) {
newx = from + (to - from) * (x - my)/(My - my)
newx = newx * diff(rrX) + min(rrX)
return(newx)
}
dens_all = density(v2_transformed, na.rm = T)
# plot the density
densclads = lapply(clades_tips2, function(vect) {
density(v2_transformed[vect[vect < 4065]], na.rm = T)
})
dens_all = density(v2_transformed, na.rm = T)
maxy = max(dens_all$y)
for (i in 1:length(densclads)) {
maxy = max(max(densclads[[i]]$y), maxy)
}
newlower_y = xrel(c(0, lower_y, 1))[2]
for (i in 1:length(densclads)) {
lines(y = yrel(c(range(dens_all$x), densclads[[i]]$x))[-c(1:2)], x = xrel(c(maxy,
densclads[[i]]$y), from = lower_y)[-1], lwd = 2, col = grays[i])
}
polygon(y = yrel(dens_all$x)[c(1, 1:length(dens_all$x), length(dens_all$x))], x = c(newlower_y,
xrel(c(maxy, dens_all$y), from = lower_y)[-1], newlower_y), border = NA, col = adjustcolor("gray80",
alpha.f = 0.5))
lines(y = yrel(dens_all$x)[c(1, 1:length(dens_all$x), length(dens_all$x))], x = c(newlower_y,
xrel(c(maxy, dens_all$y), from = lower_y)[-1], newlower_y), lwd = 3)
# add the color legend
axis_y = 0.15
#
c2 = col_v2
# rc1 = range(round(( (c(v1_transformed) - range_v1[1]) /
# (range_v1[2]-range_v1[1]))*(col1_nbins-1) )+col1_offset+1, na.rm = T)
col_x = yrel(range_v2[1] + diff(range_v2) * (0:length(c2))/length(c2))
# col_x = col_x[rc1[1]:min(length(c1),rc1[2])] c1 =
# c1[rc1[1]:min(length(c1),rc1[2])]
col_y = xrel(c(0, 1), from = axis_y + 0.005, to = lower_y - 0.05)
for (i in 1:length(c2)) {
polygon(y = col_x[c(i, i + 1)][c(1, 1, 2, 2)], x = col_y[c(1, 2, 2, 1)], border = c2[i],
col = c2[i])
}
polygon(y = c(relative_rangeY[1], col_x[1])[c(1, 1, 2, 2)], x = col_y[c(1, 2, 2,
1)], border = c2[2], col = c2[2])
lx = length(c2)
polygon(y = c(relative_rangeY[2], col_x[lx])[c(1, 1, 2, 2)], x = col_y[c(1, 2, 2,
1)], border = c2[lx], col = c2[lx])
#### axis ####
# add the axis
lines(y = relative_rangeY, x = c(axis_y, axis_y), lwd = 2)
lines(y = relative_rangeY, x = -c(axis_y, axis_y), lwd = 2)
lab = c(1e-04, 0.002, 0.005, 0.01, 0.02, 0.05, 0.1, 0.2)
ticks = yrel(sort(c(range(dens_all$x), log(lab + 0.005))))
# text(0,relative_rangeY[1],expression(speciation~rates~'('~My^'-1'~')'))
for (i in 2:(1 + length(lab))) {
lines(y = ticks[c(i, i)], x = axis_y + c(-0.015, 0.015), lwd = 1.5)
text(labels = lab[i - 1], y = ticks[i], x = 0)
lines(y = ticks[c(i, i)], x = -axis_y + c(-0.015, 0.015), lwd = 1.5)
}
text(lab = expression(paste(theta["T"])), x = -0.2, y = 0.94, cex = 1)
text(lab = expression(paste(theta["W"])), x = 0.2, y = 0.94, cex = 1)
text(lab = "Genetic Diversity", x = 0, y = 1, cex = 1.1)
#### close pdf ####
if (save_pdf) dev.off()Figure 1. Mammals MCC phylogeny from Upham et al. 2019, colored with branch-specific speciation rates estimated with ClaDS2. Outer circles reflect estimated original? genetic diversity, colored in red for \(\theta_{T}\) and blue for \(\theta_{W}\) (see scale on the top right corner). Insets show distributions of tip speciation rates (inner panel, analyses on the MCC tree) and genetic diversity (top right panel) for all mammals (in black) and for 14 clades (colored) with more than 20 species, on a log scale. ClaDS2 hyper-parameters from posterior distribution of trees and MCC tree are shown in Fig. S2, while mean tip speciation rates estimates are shown in Fig. S3 and S4.
#### label clades that were used for PGLS analyses
mGendivSpRate <- spRate %>%
filter(!set %in% "treeMCC") %>%
group_by(species) %>%
dplyr::summarise(rate_mean = mean(tipRate, na.rm = TRUE), rate_sd = sd(tipRate,
na.rm = TRUE), rate_se = rate_sd/sqrt(n()), rate_lower.ci = CI(tipRate, ci = 0.95)[1],
rate_upper.ci = CI(tipRate, ci = 0.95)[3], clades = unique(clades)) %>%
arrange(clades) %>%
mutate(ord = seq(1, length(n)), species = forcats::fct_reorder(species, ord),
clades = forcats::fct_relevel(case_when(!clades %in% unique(PGLSclade$clade) ~
"Other", TRUE ~ clades), "Other", after = 14)) %>%
inner_join(., select(gen.div, species, EstPi, EstTheta, Nind), by = "species")
tcol0 <- cladesSum %>%
filter(clades %in% mGendivSpRate$clades) %>%
bind_rows(., data.frame(clades = "Other", col = "gray90"))
tcol <- c("black", as.character(tcol0[, 3]))
names(tcol) <- c("All Mammals", as.character(tcol0[, 1]))
# q30 <- ggplot(mGendivSpRate, aes(y = EstPi, x = rate_mean)) +
# geom_errorbarh(aes(xmin = rate_lower.ci, xmax = rate_upper.ci), size = 0.5,
# #alpha = 0.5, color = 'gray70') + geom_point(size = 1.2, shape = 21, fill =
# 'white', #alpha = 0.5, color = 'gray70', stroke = 0.5) +
# scale_y_continuous(trans='log10') + scale_x_continuous(trans='log10') +
# geom_smooth(fill = 'blue', color = 'black', alpha = 0.2, linetype = 'dashed',
# method=lm, position = 'identity', size = 0.8, fullrange = FALSE) + theme_bw()
# + theme(plot.margin = unit(c(0,0,0,0), 'cm'), panel.grid.major =
# element_blank(), panel.grid.minor = element_blank()) + labs(y =
# expression(paste('Genetic diversity (',theta['T'],')')), x = NULL) +
# annotate(geom='text', x=0.028, y=0.0004, label=paste0('MCC PGLS:\nEstimate =
# ', round(mccpgls['EstPi','slope'],3), '\np-value = ',
# formatC(mccpgls['EstPi','pvalue'], format = 'E', digits = 3), '\nlambda = ',
# round(mccpgls['EstPi','lambda'],3), '\nR^2 = ',
# round(mccpgls['EstPi','R2resid'],3)), size = 3.6, hjust = 0)
pglsSet2 <- pglsSet
names(pglsSet2) <- levels(droplevels(filter(mGendivSpRate, !clades %in% "Other"))$clades)
mGendivSpRate2 <- mGendivSpRate
mGendivSpRate2$clades2 <- "Mammals"
mGendivSpRate3 <- filter(mGendivSpRate, !clades %in% "Other") %>%
mutate(clades2 = clades) %>%
bind_rows(mGendivSpRate2) %>%
mutate(clades2 = fct_relevel(clades2, "Mammals", after = 0))
pglsSet2 <- c("All Mammals", pglsSet)
names(pglsSet2) <- levels(mGendivSpRate3$clades2)
q3 <- ggplot(data = mGendivSpRate3, aes(y = EstPi, x = rate_mean)) + geom_errorbarh(aes(xmin = rate_lower.ci,
color = clades2, xmax = rate_upper.ci), alpha = 0.8, show.legend = F) + scale_y_continuous(trans = "log10") +
scale_x_continuous(trans = "log10") + geom_smooth(aes(y = EstPi, x = rate_mean),
fill = "blue", color = "black", alpha = 0.2, linetype = "dashed", method = lm,
position = "identity", size = 0.5, fullrange = FALSE) + scale_colour_manual(values = tcol[-16]) +
theme_bw() + theme(plot.margin = unit(c(0, 0, 0, 0), "cm"), panel.grid.major = element_blank(),
panel.grid.minor = element_blank(), panel.spacing.x = unit(0, "lines"), panel.spacing.y = unit(0,
"lines"), strip.text = element_text(size = 10, margin = margin(0.1, 0, 0.1,
0, "cm")), strip.placement.x = "inside") + labs(y = expression(paste("Genetic diversity (",
theta["T"], ")")), x = bquote("Speciation rate " ~ (Myr^-1))) + facet_wrap(~clades2,
ncol = 5, labeller = labeller(clades2 = pglsSet2))
# ggarrange(ggarrange(NULL,q30,NULL,ncol = 1, heights = c(0.2,1,0.2)),q3,
# ncol=1)
# q <- ggarrange(q30,q3, ncol=1)
q3ggsave(paste0(fig_path, "Figure2.pdf"), q3, width = 9, height = 6)
ggsave(paste0(fig_path, "Figure2.png"), q3, width = 9, height = 6)
# ggsave('/Users/acas/Dropbox/Post-docs/Morlon_Lab/manuscript_projects_info/mammals_genDiv_SpRate/figures/PiVsRate.pdf',
# q3, width = 10, height = 8, useDingbats = FALSE )
# q3.5 <- ggplot(data = mGendivSpRate, aes(y = EstTheta, x = rate_mean)) +
# geom_errorbarh(aes(xmin = rate_lower.ci,xmax = rate_upper.ci,colour =
# clades), alpha = 0.4) + geom_point(aes(colour = clades), size = 0.5, alpha =
# 0.4) + scale_colour_manual(values = tcol, breaks = names(tcol), labels =
# c('All Mammals', pglsSet, 'Other')) + scale_y_continuous(trans='log10',
# limits = c(0.00015,0.85)) + scale_x_continuous(trans='log10') + #
# geom_vline(data = mGendivSpRate, # aes(xintercept = mean(rate_mean)),
# linetype = 2, colour = 'gray50') + # geom_hline(data = mGendivSpRate, #
# aes(yintercept = mean(EstTheta)), linetype = 2, colour = 'gray50') +
# stat_smooth(data = filter(mGendivSpRate, !clades %in% 'Other'), aes(y =
# EstTheta, x = rate_mean, colour = clades), geom='line', method=lm, position =
# 'identity', se = FALSE, fullrange = TRUE, show.legend = F, alpha = 0.6, size
# = 0.7) + geom_smooth(data = mGendivSpRate, aes(y = EstTheta, x = rate_mean,
# color = 'All Mammals'), method=lm, position = 'identity', fullrange = TRUE,
# se = F) + #ggtitle('Pi vs mean rates with CI from 100 posterior trees') +
# theme_bw() + theme(plot.margin = unit(c(0,0,0,0), 'cm'), panel.grid.major =
# element_blank(), panel.grid.minor = element_blank()) + labs(y =
# expression(paste(theta['W'])), x = bquote('Speciation rate '~(Myr^-1)),
# colour = 'Clades') + annotate(geom='text', x=0.015, y=0.0003,
# label=paste0('MCC PGLS:\nEstimate = ', round(mccpgls['EstTheta','slope'],3),
# '\np-value = ', formatC(mccpgls['EstTheta','pvalue'], format = 'E', digits =
# 3), '\nlambda = ', round(mccpgls['EstTheta','lambda'],3), '\nR^2 = ',
# round(mccpgls['EstTheta','R2resid'],3)), size = 2.5, hjust = 0) #q3.5 q33.5
# <- ggarrange(q3, q3.5, ncol = 1, nrow = 2, align = 'v', #widths = c(2.5, 1),
# heights = c(1, 2.5, 2.5), common.legend = T , legend = 'right') q33.5
# ggsave(paste0(fig_path,'Figure2.pdf'), q33.5, width = 7, height = 8,
# useDingbats = FALSE ) ggsave(paste0(fig_path,'Figure2.png'), q33.5, width =
# 7, height = 8)Figure 2. Relationship between intraspecific genetic diversity (Tajima’s T) and speciation rate for all mammal species and for each of the 14 clades with at least 20 species. Speciation rates represented by the 95% confidence intervals from 100 posterior trees. Included OLS regression lines; axis are log scaled.
pglsResult <- bind_rows(PGLSglobal,PGLSclade) %>%
filter(!set %in% 'treeMCC',
Term %in% 'log(tipRate)',
analysis %in% 'EstPi') %>%
rename(pvalue = Pr...t..) %>%
mutate(.variable = ifelse(is.na(clade), 'All Mammals', clade),
pvalueBin = ifelse(pvalue < 0.05, 'Signif.','Not signif.')) %>%
select(clade, .variable, set, Estimate, pvalue, pvalueBin)
BMLMglobalPi <- tibble()
for(g in names(BMLMglobal[['estPi']])[-101]){
gl <- BMLMglobal[['estPi']][[g]] %>%
filter(.chain %in% 1) %>%
select(-.value, -.variable, -r_clades, -.chain,-.iteration,-.draw) %>%
rename(.value = b_logtipRate) %>%
mutate(.variable = 'All Mammals') %>%
distinct()
cl <- BMLMglobal[['estPi']][[g]] %>%
filter(.chain %in% 1) %>%
select(-.chain,-.iteration,-.draw,-r_clades, -b_logtipRate) %>%
distinct()
BMLMglobalPi <- bind_rows(BMLMglobalPi,bind_rows(gl,cl))
}
pp1 <- ggplot(filter(BMLMglobalPi, .variable %in% 'All Mammals'),
aes(x = .value, y = fct_rev(.variable))) +
#stat_intervalh(.width = c(.50, .80, .95, .99)) +
geom_vline(aes(xintercept = 0), linetype= 2) +
geom_halfeyeh(fill = "gray80", point_interval = median_hdi,
.width = .95, alpha = 0.8, scale = 0.7) +
theme_bw() + xlim(-2.27, 1.04) +
theme(axis.title.x=element_blank(),axis.title.y=element_blank(),
axis.text.x=element_blank(), axis.ticks.x=element_blank(),
panel.grid.major = element_blank(), panel.grid.minor = element_blank(),
text = element_text(size=22)) +
geom_point(data = filter(pglsResult, .variable %in% 'All Mammals'),
aes(x = Estimate, y = fct_rev(.variable),
colour = pvalueBin), alpha = 0.8, size = 1,
position = position_nudge(y = -0.15), show.legend = F) +
scale_color_manual(values = c('red','gray80'))
pp2 <- ggplot(filter(BMLMglobalPi, !.variable %in% 'All Mammals'),
aes(x = .value, y = fct_rev(.variable))) +
geom_vline(aes(xintercept = 0), linetype= 2) +
geom_halfeyeh(fill = "gray80",
point_interval = median_hdi, .width = .95, alpha = 0.8, scale = 0.7, show.legend = F ) +
#scale_fill_manual(values = tcol[2:15]) +
scale_y_discrete(labels=rev(pglsSet)) +
theme_bw() + xlim(-2.27, 1.04) +
theme(#axis.title.x=element_blank(),
axis.title.y=element_blank(),
axis.text.y = element_text(colour = rev(tcol[2:15])),
#axis.text.x=element_blank(), axis.ticks.x=element_blank(),
panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
text = element_text(size=22)) +
geom_point(data = filter(pglsResult, !.variable %in% 'All Mammals'),
aes(x = Estimate, y = fct_rev(.variable),
colour = pvalueBin), alpha = 0.8, size = 1,
position = position_nudge(y = -0.15), show.legend = F) +
scale_colour_manual(values = c('gray80','red')) +
labs(x = "Slope Estimate")
p = ggarrange(pp1, pp2, nrow = 2,heights = c(0.3, 2), align = "v")
pggsave(paste0(fig_path,'Figure3.pdf'), p, height = 9, width = 9)
ggsave(paste0(fig_path,'Figure3.png'), p, height = 9, width = 9)Figure 3. Slope and significance of the relationship between intraspecific genetic diversity and speciation rate for all mammals (top panel) and each of the 14 clades with at least 20 species (bottom panel). Results for Bayesian Multilevel Models (BMLM, gray density plots with median point and 95% confidence intervals) and for the Phylogenetic Generalized Least Squares (PGLS, points colored red if p-value < 0.05) analysis. Results for all genetic diversity measures and on the posterior distribution of trees are shown in Fig. S5.
pglsResulttraitsMCC <- PGLSglobal_traits %>%
filter(set %in% "treeMCC", !Term %in% "(Intercept)") %>%
rename(p = Pr...t.., SE = Std..Error) %>%
ungroup() %>%
mutate(`p-value` = ifelse(p < 0.001, "<0.001", round(p, 4)), Term = recode(Term,
`log(tipRate)` = !!"λ", `log(BodyMassKg_notInputed)` = "Body Mass", `log(geoArea_km2)` = "Range area",
`log(mean_temp)` = "Mean temperature", `log(litter_or_clutch_size_n)` = "Litter size",
`log(GenerationLength_d)` = "Generation length"), ana = "PGLS") %>%
# select(analysis, Estimate, Term, SE, `p-value`, ana)
select(analysis, Estimate, Term, SE, p, ana)
BMLMglobal_traitsExt <- tibble()
for (g in names(BMLMglobal_traits)) {
for (i in names(BMLMglobal_traits[[g]])) {
BMLMglobal_traitsExt <- bind_rows(BMLMglobal_traitsExt, BMLMglobal_traits[[g]][[i]])
}
}
BMLMglobal_traitsExt <- BMLMglobal_traitsExt %>%
select(-.chain, -.iteration, -.draw) %>%
rename(Term = .variable, Estimate = .value, analysis = model) %>%
mutate(ana = "BMLM")
BMLMglobal_traitsExtMCC <- filter(BMLMglobal_traitsExt, set %in% "treeMCC") %>%
group_by(Term, analysis) %>%
median_qi(Estimate) %>%
mutate(Term = recode(Term, b_logtipRate = !!"λ", b_logBodyMassKg_notInputed = "Body Mass",
b_loggeoArea_km2 = "Range area", b_logmean_temp = "Mean temperature", b_loglitter_or_clutch_size_n = "Litter size",
b_logGenerationLength_d = "Generation length"), ana = "BMLM", `95% CI` = paste0("[",
round(.lower, 3), "; ", round(.upper, 3), "]")) %>%
select(-(.lower:.interval))
BMLMPGLSmcc <- bind_rows(pglsResulttraitsMCC, BMLMglobal_traitsExtMCC) %>%
mutate_if(is.numeric, round, 3) %>%
pivot_wider(names_from = ana, values_from = c(Estimate, SE, p, `95% CI`)) %>%
select(-c("SE_BMLM", "p_BMLM", "95% CI_PGLS")) %>%
relocate(Estimate_BMLM, .before = "95% CI_BMLM") %>%
pivot_wider(names_from = analysis, values_from = Estimate_PGLS:`95% CI_BMLM`)
BMLMPGLSmccRed <- BMLMPGLSmcc[, c(1, 3, 6, 9, 12, 15, 4, 7, 10, 13, 16, 2, 5, 8,
11, 14)] %>%
select(-starts_with("p"))
ps <- BMLMPGLSmcc %>%
select(starts_with("p"))
typology <- tibble(col_keys = names(BMLMPGLSmccRed), name = names(BMLMPGLSmccRed)) %>%
separate(name, sep = "_", into = c("stat", "analysis", "model"))
ft <- autofit(flextable(BMLMPGLSmccRed)) %>%
set_header_df(mapping = typology[, c(1, 4, 3, 2)], key = "col_keys") %>%
merge_h(part = "header") %>%
merge_v(part = "header") %>%
flextable::compose(i = 1, j = 2, part = "header", value = as_paragraph("θ",
as_sub("T"), " ~ Traits")) %>%
flextable::compose(i = 1, j = 6, part = "header", value = as_paragraph("λ",
" ~ Traits")) %>%
flextable::compose(i = 1, j = 10, part = "header", value = as_paragraph("θ",
as_sub("T"), " ~ ", "λ", " + Traits")) %>%
colformat_num(j = colnames(BMLMPGLSmccRed), na_str = "", digits = 3) %>%
theme_booktabs(fontsize = 10) %>%
fix_border_issues() %>%
align_nottext_col(align = "center", header = TRUE) %>%
vline(j = 3, border = officer::fp_border(), part = "all") %>%
vline(j = 5, border = officer::fp_border(width = 2), part = "all") %>%
vline(j = 7, border = officer::fp_border(), part = "all") %>%
vline(j = 9, border = officer::fp_border(width = 2), part = "all") %>%
vline(j = 11, border = officer::fp_border(), part = "all") %>%
bold(j = 1, part = "all") %>%
bold(j = 2, i = which(ps$p_PGLS_piTraits < 0.001), part = "body") %>%
bold(j = 6, i = which(ps$p_PGLS_SpRateTraits < 0.001), part = "body") %>%
bold(j = 10, i = which(ps$p_PGLS_piSpRateTraits < 0.001), part = "body")
ftθT ~ Traits | λ ~ Traits | θT ~ λ + Traits | ||||||||||
PGLS | BMLM | PGLS | BMLM | PGLS | BMLM | |||||||
Term | Estimate | SE | Estimate | 95% CI | Estimate | SE | Estimate | 95% CI | Estimate | SE | Estimate | 95% CI |
λ | -0.264 | 0.077 | -0.267 | [-0.427; -0.105] | ||||||||
Body Mass | -0.145 | 0.026 | -0.148 | [-0.202; -0.091] | 0.007 | 0.010 | 0.007 | [-0.012; 0.025] | -0.139 | 0.025 | -0.143 | [-0.196; -0.093] |
Range area | 0.137 | 0.014 | 0.138 | [0.11; 0.166] | -0.005 | 0.003 | -0.005 | [-0.011; 0] | 0.131 | 0.015 | 0.132 | [0.104; 0.161] |
Mean temperature | 0.330 | 0.087 | 0.331 | [0.154; 0.502] | -0.021 | 0.018 | -0.021 | [-0.059; 0.015] | 0.307 | 0.087 | 0.306 | [0.131; 0.49] |
Litter size | -0.420 | 0.091 | -0.420 | [-0.617; -0.237] | 0.051 | 0.028 | 0.050 | [-0.004; 0.105] | -0.400 | 0.089 | -0.405 | [-0.587; -0.217] |
Generation length | -0.074 | 0.105 | -0.068 | [-0.269; 0.136] | -0.008 | 0.030 | -0.010 | [-0.068; 0.047] | -0.084 | 0.102 | -0.081 | [-0.285; 0.116] |
# save_as_image(ft, path = paste0(fig_path,'table1.png'))Table 1. Correlations between genetic diversity (\(\theta_{T}\)), speciation rates (\(\lambda\)) and traits of interest; PGLS and BMLM analyses on the MCC tree. PGLS estimates in bold have a p-value < 0.05. Results on the posterior distribution of trees are shown in Fig. S6.
pglsResultMR <- PGLSglobal_mutRate %>%
filter(!analysis %in% c("cEstPiSpRate", "piMutRate"),
!Term %in% '(Intercept)') %>%
rename(pvalue = Pr...t..) %>%
mutate(pvalueBin = ifelse(pvalue < 0.05, 'Signif.','Not signif.'),
ana = 'PGLS') %>%
select(analysis, set, Estimate, Term, pvalue, pvalueBin, ana) %>% data.frame()
pglsResultMR$analysis <- factor(pglsResultMR$analysis,
labels = c(expression(paste(mu,' ~ ', lambda)),
expression(paste(N["e"], ' ~ ', lambda))))
BMLMglobal_mutRateExt <- tibble()
for(g in names(BMLMglobal_mutRate)){
for(i in names(BMLMglobal_mutRate[[g]])){
BMLMglobal_mutRateExt <- bind_rows(BMLMglobal_mutRateExt,
BMLMglobal_mutRate[[g]][[i]])
}
}
BMLMglobal_mutRateExt <- BMLMglobal_mutRateExt %>%
select(-.chain,-.iteration,-.draw) %>%
rename(Term = .variable, Estimate = .value, analysis = model) %>%
mutate(Term = case_when(Term %in% "b_logtipRate" ~ "log(tipRate)",
Term %in% "b_logmutrate" ~ "log(mutrate)",
Term %in% "b_logNe" ~ "log(Ne)"),
ana = 'BMLM') %>%
data.frame()
BMLMglobal_mutRateExt$analysis <- factor(BMLMglobal_mutRateExt$analysis,
labels = c(expression(paste(mu,' ~ ', lambda)),
expression(paste(N["e"],' ~ ', lambda))))
sumBMLM <- BMLMglobal_mutRateExt %>%
group_by(Term, analysis, set) %>%
median_qi(Estimate) %>%
rename(medianEstimate = Estimate)
dt <- filter(BMLMglobal_mutRateExt, !set %in% 'treeMCC',
analysis %in% c('paste(mu, " ~ ", lambda)',
'paste(N["e"], " ~ ", lambda)'))
blmlplot1 <- ggplot(dt) +
stat_halfeyeh(aes(x = Estimate, y = Term),
point_interval = median_qi,
.width = .95, size = 3, alpha = 0.8) +
geom_pointintervalh(data = filter(sumBMLM, set %in% 'treeMCC',
analysis %in% c('paste(mu, " ~ ", lambda)',
'paste(N["e"], " ~ ", lambda)')),
aes(x = medianEstimate, y = Term,
xmin =.lower, xmax = .upper), shape = 17, size = 3,
fatten_point = 3, position = position_nudge(y = -0.4)) +
geom_point(data = filter(pglsResultMR, !set %in% 'treeMCC',
analysis %in% c('paste(mu, " ~ ", lambda)',
'paste(N["e"], " ~ ", lambda)')),
aes(x = Estimate, y = Term, color = pvalueBin),
size = 2, show.legend = F, alpha = 0.4,
position = position_nudge(y = -0.1)) +
scale_colour_manual(values=c('gray80','red')) +
geom_point(data = filter(pglsResultMR, set %in% 'treeMCC',
analysis %in% c('paste(mu, " ~ ", lambda)',
'paste(N["e"], " ~ ", lambda)')),
aes(x = Estimate, y = Term, color = pvalueBin),
size = 3, show.legend = F, alpha = 0.5, shape = 17,
position = position_nudge(y = -0.5)) +
geom_vline(aes(xintercept = 0), linetype= 2) +
facet_wrap(~fct_rev(analysis), labeller = label_parsed, ncol = 1,
strip.position = "right") +
theme_bw() + xlim(-0.82,0.22) +
labs(x = "Slope Estimate") +
theme(axis.title.y=element_blank(),
axis.text.y =element_blank(), axis.ticks.y =element_blank(),
strip.text = element_text(size = 14),
panel.grid.major = element_blank(),
panel.grid.minor = element_blank())
mgendivSpRateMutRate <- gendivSpRateMutRate %>%
filter(!set %in% 'treeMCC') %>%
group_by(species) %>%
dplyr::summarise(SpRate_mean = mean(tipRate, na.rm = TRUE),
SpRate_lower.ci = CI(tipRate, ci = 0.95)[1],
SpRate_upper.ci = CI(tipRate, ci = 0.95)[3],
MutRate_mean = mean(mutRate, na.rm = TRUE),
MutRate_lower.ci = CI(mutRate, ci = 0.95)[1],
MutRate_upper.ci = CI(mutRate, ci = 0.95)[3],
Ne_mean = mean(Ne, na.rm = TRUE),
Ne_lower.ci = CI(Ne, ci = 0.95)[1],
Ne_upper.ci = CI(Ne, ci = 0.95)[3],
EstPi = mean(EstPi, na.rm = TRUE)) %>%
drop_na() %>%
pivot_longer(cols = MutRate_mean:Ne_upper.ci,
names_to = c("variable", "x"),
names_sep = "_") %>%
pivot_wider(names_from = "x",
values_from = "value")
mgendivSpRateMutRate$variable <- factor(mgendivSpRateMutRate$variable,
labels = c(expression(paste("Mutation Rate - ", mu)),
expression(paste(N["e"]))))
## mutation rate vs speciation rate
p1 = ggplot(mgendivSpRateMutRate, aes(y = mean, x = SpRate_mean)) +
geom_errorbarh(aes(xmin = SpRate_lower.ci, xmax = SpRate_upper.ci), color = "gray70") +
geom_errorbar(aes(ymin = lower.ci, ymax = upper.ci), color = "gray70") +
geom_point(size = 0.9, color = "gray50") +
geom_smooth(method=lm, position = "identity", color = 'black', se = F) +
scale_x_log10(breaks = breaks_log(n = 8), labels = label_number()) +
scale_y_log10(labels = label_scientific(), breaks = breaks_log(n = 6)) +
facet_wrap(fct_rev(variable) ~ ., labeller = label_parsed,
scales = 'free_y', ncol = 1, switch = "y" ) + theme_bw() +
labs(y = "", x = expression(paste('Speciation rate - ', lambda))) +
theme(axis.title.y=element_blank(), #axis.title.x=element_blank(),
strip.text = element_text(size = 14),
strip.placement = "outside",
strip.background.y = element_blank(),
panel.grid.major = element_blank(),
panel.grid.minor = element_blank())
q4 = ggarrange(p1, blmlplot1, ncol = 2)
q4ggsave(paste0(fig_path,'Figure4.pdf'), q4, height = 7, width = 10)
ggsave(paste0(fig_path,'Figure4.png'), q4, height = 7, width = 10)Figure 4. Relationships between speciation rate and the theoretical components of \(\theta\): mutation rate (\(\mu\)) and effective population size (\(N_e\)). Plots with means (\(\mu\)) and (\(N_e\)) and 95% confidence intervals with a regression line and log scaled axis. BMLM results using rates from 100 posterior trees (all posterior distributions; circles) and MCC tree (point with confidence interval; triangles) plotted with median and 95% confidence intervals; with PGLS estimates plotted below and colored red when significant (p-value < 0.05).
gen.div0 <- read.delim(paste0(folder_path, 'genetic_diversity/GenDiv_subsample5Ind.txt')) %>%
mutate(Nind5 = case_when(Nind == 5 ~ "5 ind.",
TRUE ~ ">5 ind."),
subPi_mean = ifelse(is.na(subPi_mean), EstPi,
subPi_mean),
subTheta_mean = ifelse(is.na(subTheta_mean), EstTheta,
subTheta_mean),
outPi = ifelse(Nind == 5, "5 ind.",
as.character(outPi)),
outPi = ifelse(EstPi == 0, "out",
as.character(outPi)),
outTheta = ifelse(Nind == 5, "5 ind.",
as.character(outTheta)),
outTheta = ifelse(EstTheta == 0, "out",
as.character(outTheta)))
p = ggplot(gen.div0) +
geom_point(aes(y = EstPi, x = subPi_mean,
alpha = fct_infreq(outPi), color = fct_infreq(outPi)), shape = 1) +
scale_x_log10() + scale_y_log10() + theme_bw() +
labs(y = expression(paste("Original ", theta["T"])),
x = expression(paste("Mean subsampled ", theta['T']))) +
scale_color_manual(values = c('black',"#71D692","#805C31"),
labels = c("Included", "With 5 ind.",
"Excluded"),
name = expression(paste("Included species from ", theta['T']))) +
scale_alpha_manual(values = c(0.1, 0.4, 1),
labels = c("Included", "With 5 ind.",
"Excluded"),
name = expression(paste("Included species from ", theta['T']))) +
theme(legend.title = element_text(color = col_v1[95]))
t = ggplot(gen.div0) +
geom_point(aes(y = EstTheta, x = subTheta_mean,
alpha = fct_infreq(outTheta),
color = fct_infreq(outTheta)), shape = 1) +
scale_x_log10() + scale_y_log10() + theme_bw() +
labs(y = expression(paste("Original ", theta[W])), x = expression(paste("Mean subsampled ", theta[W]))) +
scale_color_manual(values = c('black',"#71D692","#805C31"), ##D14D36
labels = c("Included",
"With 5 ind.",
"Excluded"),
name = expression(paste("Included species from ", theta['W']))) +
scale_alpha_manual(values = c(0.1, 0.4, 1),
labels = c("Included",
"With 5 ind.",
"Excluded"),
name = expression(paste("Included species from ", theta['W']))) +
theme(legend.title = element_text(color = col_v2[90]))n1 <- ggplot(gen.div0, aes(Nind)) + geom_histogram(aes(y = ..density..), bins = 31,
fill = "gray70", colour = "white") + geom_density(fill = "white", alpha = 0.5) +
scale_x_log10() + # geom_vline(aes(xintercept = mean(Nind))) + geom_vline(aes(xintercept = median(Nind)), linetype = 'longdash') + scale_x_log10()
scale_x_log10() + # geom_vline(aes(xintercept = mean(Nind))) + geom_vline(aes(xintercept = median(Nind)), linetype = 'longdash') + +
scale_x_log10() + # geom_vline(aes(xintercept = mean(Nind))) + geom_vline(aes(xintercept = median(Nind)), linetype = 'longdash') + #
scale_x_log10() + # geom_vline(aes(xintercept = mean(Nind))) + geom_vline(aes(xintercept = median(Nind)), linetype = 'longdash') + geom_vline(aes(xintercept
scale_x_log10() + # geom_vline(aes(xintercept = mean(Nind))) + geom_vline(aes(xintercept = median(Nind)), linetype = 'longdash') + =
scale_x_log10() + # geom_vline(aes(xintercept = mean(Nind))) + geom_vline(aes(xintercept = median(Nind)), linetype = 'longdash') + mean(Nind)))
scale_x_log10() + # geom_vline(aes(xintercept = mean(Nind))) + geom_vline(aes(xintercept = median(Nind)), linetype = 'longdash') + +
scale_x_log10() + # geom_vline(aes(xintercept = mean(Nind))) + geom_vline(aes(xintercept = median(Nind)), linetype = 'longdash') + geom_vline(aes(xintercept
scale_x_log10() + # geom_vline(aes(xintercept = mean(Nind))) + geom_vline(aes(xintercept = median(Nind)), linetype = 'longdash') + =
scale_x_log10() + # geom_vline(aes(xintercept = mean(Nind))) + geom_vline(aes(xintercept = median(Nind)), linetype = 'longdash') + median(Nind)),
scale_x_log10() + # geom_vline(aes(xintercept = mean(Nind))) + geom_vline(aes(xintercept = median(Nind)), linetype = 'longdash') + linetype
scale_x_log10() + # geom_vline(aes(xintercept = mean(Nind))) + geom_vline(aes(xintercept = median(Nind)), linetype = 'longdash') + =
scale_x_log10() + # geom_vline(aes(xintercept = mean(Nind))) + geom_vline(aes(xintercept = median(Nind)), linetype = 'longdash') + 'longdash')
scale_x_log10() + # geom_vline(aes(xintercept = mean(Nind))) + geom_vline(aes(xintercept = median(Nind)), linetype = 'longdash') + +
theme_bw() + labs(y = "Counts", x = "Number of individuals") + scale_y_continuous(breaks = c(0,
0.25, 0.5, 0.75, 1), labels = function(x) sprintf("%.3f", x)) + theme(panel.grid = element_blank())
dt <- gen.div0 %>%
select(Nind, species, EstPi, EstTheta) %>%
pivot_longer(cols = EstPi:EstTheta)
n2 <- ggplot(dt, aes(x = Nind)) + geom_point(aes(y = value, colour = name), alpha = 0.4) +
scale_color_manual(values = c(col_v1[95], col_v2[90]), labels = c(expression(paste("Estimated" ~
theta["T"])), expression(paste("Estimated" ~ theta["W"]))), name = "Genetic diversity") +
# geom_vline(aes(xintercept = mean(Nind))) + geom_vline(aes(xintercept =
# median(Nind)), linetype = 'longdash') +
theme_bw() + labs(y = "Genetic diversity", x = "Number of individuals") + theme(legend.position = "top",
panel.grid = element_blank()) + scale_y_continuous(trans = "log10", breaks = c(0,
0.001, 0.005, 0.01, 0.05, 0.1)) + scale_x_continuous(trans = "log10", breaks = c(5,
10, 20, 50, 100, 500, 1000))
npnt <- ggarrange(n1, p, n2, t, ncol = 2, nrow = 2, align = "hv", widths = c(0.75,
1.1, 0.75, 1.1), labels = c("A", "C", "B", "D"))
npntggsave(paste0(fig_path, "FigS1.pdf"), npnt, width = 10, height = 5, useDingbats = FALSE)
ggsave(paste0(fig_path, "FigS1.png"), npnt, width = 10, height = 5)Fig. S1. Number of sequences with estimated genetic diversities obtained per species (A and B) and original vs mean subsampled genetic diversity estimates (C and D). All species with exactly five individuals were included (green in C and D). Species with genetic diversity above zero for which the estimated \(\theta\) is outside the range of \(\theta\)s 1000 subsampled replicates to five individuals were excluded (brown in C and D) for analyses.
dt <- mGendivSpRate %>%
filter(!clades %in% "Other") %>%
select(species, rate_mean, EstPi, EstTheta, clades) %>%
pivot_longer(cols = c(EstPi:EstTheta))
tr = ggplot(dt) + geom_boxplot(aes(y = value, color = name, x = clades), alpha = 0.2) +
scale_y_log10() + theme_bw() + scale_fill_manual(values = tcol[2:15]) + scale_color_manual(values = c(col_v1[95],
col_v2[90]), labels = c(expression(paste("Estimated" ~ theta["T"])), expression(paste("Estimated" ~
theta["W"])), "rate"), name = "Genetic diversity") + theme(axis.text.x = element_blank(),
axis.ticks = element_blank(), panel.grid = element_blank(), legend.position = "top",
plot.margin = unit(c(1, 1, -0.1, 1), "cm")) + guides(fill = FALSE) + labs(y = expression(theta),
x = "")
r = ggplot(dt) + geom_boxplot(aes(y = rate_mean, x = clades), alpha = 0.2) + scale_y_log10() +
theme_bw() + theme(axis.text.x = element_text(angle = 25, hjust = 1), panel.grid = element_blank(),
plot.margin = unit(c(-0.1, 1, 1, 1), "cm")) + labs(y = expression(paste("Mean ",
lambda)), x = "") + scale_x_discrete(labels = pglsSet)
q = ggarrange(tr, r, ncol = 1, align = "v")
qggsave(paste0(fig_path, "FigS2.pdf"), q, width = 10, height = 6, useDingbats = FALSE)
ggsave(paste0(fig_path, "FigS2.png"), q, width = 10, height = 6)Fig. S2. Genetic diversity (\(\theta\)) and tip speciation rates (\(\lambda\), species mean rates from 100 posterior trees) for the 14 clades with at least 20 species.
meanTipRateClade <- gendivSpRateMutRate %>%
filter(!set %in% "treeMCC") %>%
group_by(species) %>%
dplyr::summarise(SpRate_mean = mean(tipRate, na.rm = TRUE)) %>%
left_join(., clades, by = "species")
p <- ggplot() + stat_density(data = filter(meanTipRateClade, clades %in% tcol0$clades[-15]),
aes(SpRate_mean, colour = clades), geom = "line", position = "identity", size = 0.5,
show.legend = T) + scale_colour_manual(values = tcol[2:15], labels = pglsSet) +
labs(x = bquote("Mean tip speciation rates " ~ (Myr^-1)), y = "Density", colour = "Clades") +
geom_density(data = meanTipRateClade, aes(x = SpRate_mean), fill = NA, size = 1) +
geom_vline(data = meanTipRateClade, aes(xintercept = mean(SpRate_mean)), linetype = 2,
colour = "gray50") + theme_bw() + theme(panel.background = element_rect(fill = "#ffffff"),
panel.grid = element_blank(), legend.position = "bottom") + scale_x_continuous(trans = "log10")
pggsave(paste0(fig_path, "FigS3.pdf"), p, width = 8, height = 5, useDingbats = FALSE)
ggsave(paste0(fig_path, "FigS3.png"), p, width = 8, height = 5)Fig. S3. Global (black) distribution of species-specific speciation rates estimated from 100 posterior trees and for 14 clades (colored) with more than 20 species for both speciation rates and genetic diversity data.
library(nlme)
library(tidyverse)
library(rr2) ## Ives et al. 2018 R2s
library(caper)
pglsList <- list.files(paste0(folder_path, "pgls/global/output"))
for (pgs in pglsList) {
pgs <- pglsList[repi]
tree <- word(tools::file_path_sans_ext(pgs), sep = "_", 4, 4)
pglspirateMCC <- readRDS(paste0(folder_path, "pgls/global/output/", pgs))$EstPi
dtset <- pglspirateMCC$data$data %>%
rownames_to_column("species")
fit1 <- gls(log(EstPi) ~ log(tipRate), data = dtset, correlation = corPagel(1,
phy = pglspirateMCC$data$phy, form = ~species), method = "ML")
r2 <- R2(mod = fit1, pred = FALSE) ## pred takes a lot of time and I don't think it makes sense with something that is so badly predicted by the model, the focus is more in just showing the range of correlation across posterior trees, we already know the correlation strength is really weak.
saveRDS(c(fit1, r2), paste0(folder_path, "pgls/global/outputR2_rr2/", tree, ".rds"))
}
####
pglsList <- list.files(paste0(folder_path, "pgls/global/outputR2_rr2"))
dtR2 <- data.frame()
for (tree in pglsList) {
Tree <- tools::file_path_sans_ext(tree)
fit1 <- readRDS(paste0(folder_path, "pgls/global/outputR2_rr2/", tree))
class(fit1) <- "gls"
dtR2 <- bind_rows(dtR2, cbind(Tree, as.data.frame(summary(fit1)$tTable)[2, c(1,
4)], as.character(summary(fit1)$modelStruct), fit1$R2_lik, fit1$R2_resid))
}
colnames(dtR2) <- c("set", "Estimate", "pvalue", "lambda", "R2_lik", "R2_resid")
saveRDS(dtR2, paste0(folder_path, "pgls/global/rr2Output.rds"))### make boxplot with each panel for slope estimate, p-value, lambda and
### R2resid
# Also shown is the PGLS slope estimate, significance, lambda transformation
# and explained variance for the consensus tree (R2resid; estimated as Ives
# 2019).
dtR2 <- readRDS(paste0(folder_path, "pgls/global/rr2Output.rds")) %>%
mutate(lambda = as.numeric(lambda)) %>%
pivot_longer(cols = -set)
dtR2$name <- factor(dtR2$name, levels = c("Estimate", "pvalue", "lambda", "R2_resid",
"R2_lik"))
p <- ggplot(filter(dtR2, !set %in% "treeMCC"), aes(x = name, y = value)) + geom_boxplot(outlier.shape = NA) +
geom_jitter(alpha = 0.4, size = 0.7) + geom_point(data = filter(dtR2, set %in%
"treeMCC"), aes(x = name, y = value), color = "red", size = 1.2) + facet_wrap(~name,
scales = "free", nrow = 1) + ggh4x::facetted_pos_scales(y = list(name == "pvalue" ~
scale_y_log10())) + theme_bw() + theme(strip.background = element_blank(), strip.text.x = element_blank()) +
labs(x = "", y = expression(paste(theta["T"], "~", lambda, " PGLS output"))) +
scale_x_discrete(breaks = levels(dtR2$name), labels = c("Slope estimate", "p-value",
expression(paste("Pagel's ", lambda)), expression(paste("R"["resid"]^"2")),
expression(paste("R"["lik"]^"2"))))
pggsave(paste0(fig_path, "FigS4.pdf"), p, width = 8, height = 5, useDingbats = FALSE)
ggsave(paste0(fig_path, "FigS4.png"), p, width = 8, height = 5)Fig. S4. Output from PGLS analysis using \(\theta_{T}\) as response variable with \(R^2\) estimates from Ives 2019, across 100 posterior trees (black) and MCC tree (red).
###prepare PGLS plotting
pgls2 <- bind_rows(PGLSglobal, PGLSclade) %>%
filter(Term %in% 'log(tipRate)') %>%
select(c('clade','set','analysis','Estimate','Pr...t..')) %>%
mutate(colour = case_when(is.na(clade) ~ 'All Mammals',
TRUE ~ 'clades'),
size = case_when(set %in% 'treeMCC' ~ 'MCC',
TRUE ~ 'Posterior'),
pvalue = ifelse(Pr...t.. < 0.05, "sig.", "not sig."),
shape = paste0(analysis,"_", pvalue),
analysis = fct_relevel(analysis,"EstPi", "subPi", "EstTheta","subtheta"),
clade = ifelse(is.na(clade), 'ALL', as.character(clade))) %>%
arrange(analysis)
sig <- c('black','white')
ppgls <- ggplot(pgls2) +
geom_point(aes(y = Estimate, x = clade,
group = analysis,
size = fct_infreq(size),
colour = fct_inorder(shape),
shape = fct_rev(pvalue),
fill = fct_inorder(shape)
),
stroke = 0.5, #alpha = 0.2,
position = position_dodge(width = 0.65)) +
geom_vline(xintercept = 1.5, size = 0.2, linetype = 'dashed') +
geom_hline(yintercept = 0, size = 0.2) +
theme_bw() +
scale_shape_manual(name = 'PGLS sig.',
values = c(23, 21),
labels = c("< 0.05", "> 0.05"),
guide = guide_legend(override.aes =
list(color = c("black","gray90"),
alpha = 1), nrow=2)
) +
scale_colour_manual(name = 'PGLS sig.',
values = rep(sig,4),
guide = "none"
#breaks = unique(pgls2$shape)[1:2],
# labels = c("< 0.05", "> 0.05"),
# guide = guide_legend(override.aes =
# list(color = c("gray90","black"),
# alpha = 1), nrow=2)
) +
scale_fill_manual(name = 'Genetic diversity',
values = c(col_v1[98], col_v1[98],
col_v1[70], col_v1[70],
col_v2[98], col_v2[98],
col_v2[70], col_v2[70]),
labels = c(expression(paste("Original" ~ theta['T'])),
expression(paste("Subsampled" ~ theta['T'])),
expression(paste("Original" ~ theta['W'])),
expression(paste("Subsampled" ~ theta['W']))),
guide = guide_legend(override.aes =
list(color = c(col_v1[98], col_v1[70],
col_v2[98], col_v2[70],
NA,NA,NA,NA),
alpha = 1), nrow=2, order = 1)
) +
scale_size_manual(name = 'Tree set',
values = c(1.5,4),
labels = c("Posterior trees","MCC tree"),
guide = guide_legend(nrow=2)) +
scale_x_discrete(labels=c("All Mammals", pglsSet)) +
theme(legend.position ="top",
plot.margin = unit(c(0,0,0,0), "cm"),
legend.spacing.y = unit(0.1, 'cm'),
panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
axis.ticks.x = element_blank(),
axis.text.x = element_blank()) +
labs(y = "PGLS estimates", x = "") +
scale_y_continuous(limits = c(-1.8,0.65)) ## prepare BMLM plotting
BMLMglobalsum <- tibble()
for (i in names(BMLMglobal)) {
for (g in names(BMLMglobal[[i]])) {
gl <- BMLMglobal[[i]][[g]] %>%
group_by(model, set) %>%
median_qi(b_logtipRate) %>%
rename(.value = b_logtipRate) %>%
mutate(.variable = "All Mammals")
cl <- BMLMglobal[[i]][[g]] %>%
group_by(.variable, model, set) %>%
median_qi(.value)
BMLMglobalsum <- bind_rows(BMLMglobalsum, bind_rows(gl, cl))
}
}
BMLMglobalsum1 <- BMLMglobalsum %>%
filter(.variable %in% "All Mammals") %>%
select(-.width, -.point, -.interval, -.variable) %>%
mutate(colour = fct_relevel(case_when(set %in% "treeMCC" ~ "MCC", TRUE ~ "Posterior"),
"Posterior", "MCC")) %>%
pivot_wider(names_from = model, values_from = c(.value, .lower, .upper)) %>%
arrange(colour)
BMLMglobalsum2 <- BMLMglobalsum %>%
select(-.width, -.point, -.interval) %>%
mutate(colour = case_when(.variable %in% "All Mammals" ~ "All Mammals", TRUE ~
"clades"), size = case_when(set %in% "treeMCC" ~ "MCC", TRUE ~ "Posterior"),
analysis = fct_relevel(model, "estPi", "subPi", "estTheta", "subTheta"),
clade = ifelse(.variable %in% "All Mammals", "ALL", as.character(.variable))) %>%
arrange(analysis)boldSet <- c("All Mammals", pglsSet)
bold.labels <- ifelse(boldSet %in% c("All Mammals", "Eulipotyphla", "Guinea Pig-related",
"Lagomorpha", "Muridae", "Squirrel-related", "Yinpterochiroptera"), yes = "bold",
no = "plain")
pbmlm <- ggplot() + geom_pointinterval(data = filter(BMLMglobalsum2, set %in% "treeMCC"),
aes(y = .value, x = clade, ymin = .lower, ymax = .upper, color = analysis, group = analysis),
alpha = 0.8, point_alpha = 0.8, point_size = 4, size = 4, position = position_dodge(width = 0.85)) +
geom_pointinterval(data = filter(BMLMglobalsum2, !set %in% "treeMCC"), aes(y = .value,
x = clade, ymin = .lower, ymax = .upper, group = analysis, colour = analysis),
alpha = 0.07, point_alpha = 0.1, point_size = 0.7, size = 0.7, position = position_jitterdodge(jitter.width = 0.65,
dodge.width = 0.85)) + geom_vline(xintercept = 1.5, size = 0.2, linetype = "dashed") +
geom_hline(yintercept = 0, size = 0.2) + scale_color_manual(name = "Genetic diversity",
values = c(col_v1[98], col_v1[70], col_v2[98], col_v2[70]), guide = "none") +
scale_x_discrete(labels = boldSet) + theme_bw() + theme(legend.position = "bottom",
plot.margin = unit(c(0, 0, 0, 0), "cm"), panel.grid.major = element_blank(),
panel.grid.minor = element_blank(), axis.text.x = element_text(angle = 30, hjust = 1,
face = bold.labels)) + labs(y = "BMLM estimates", x = "") + scale_y_continuous(limits = c(-1.8,
0.65))
qs4 = ggpubr::ggarrange(ppgls, pbmlm, ncol = 1, nrow = 2, common.legend = T, legend = "bottom",
align = "v", heights = c(0.8, 1))
qs4ggsave(paste0(fig_path, "FigS5.pdf"), qs4, width = 9, height = 6, useDingbats = FALSE)
ggsave(paste0(fig_path, "FigS5.png"), qs4, width = 9, height = 6)Fig. S5. Estimates from PGLS (top) and BMLM (bottom) analyses of the slope of the relationship between genetic diversity and speciation rates across all mammals (on the left) and each of the 14 clades with at least 20 species (on the right). Analyses were performed using either original estimates of \(\theta_{T}\) and \(\theta_{W}\) or mean estimates over 1000 subsampled replicates, and for the MCC tree as well as 100 posterior trees.
load(paste0(folder_path, "speciation_rate/output/upham_4064sp_FR_MCCposterior100.rdata"))
gendivSpRateN <- spRate %>%
left_join(., select(gen.div, species, EstPi, Nind), by = "species")
DataSubsetMCC10 <- gendivSpRateN %>%
filter(set %in% "treeMCC", Nind > 9)
Nspecies10 <- DataSubsetMCC10 %>%
group_by(clades) %>%
dplyr::summarise(NPisp10 = n())
treeMCC <- TreeSet[["treeMCC"]]
TreeSubset <- drop.tip(treeMCC, as.character(treeMCC$tip.label[which(!treeMCC$tip.label %in%
DataSubsetMCC10$species)]))
datacomp <- comparative.data(data = DataSubsetMCC10, phy = TreeSubset, names.col = "species",
vcv = TRUE, na.omit = TRUE, warn.dropped = TRUE)
# modpi10 <- pgls(log(EstPi) ~ log(tipRate), data = datacomp, lambda = 'ML')
# saveRDS(modpi10, paste0(folder_path,'pgls/extra/modpi10.rds'))
modpi10 <- readRDS(paste0(folder_path, "pgls/extra/modpi10.rds"))
# summary(modpi10)
#####
DataSubsetMCC20 <- gendivSpRateN %>%
filter(set %in% "treeMCC", Nind > 19)
Nspecies20 <- DataSubsetMCC20 %>%
group_by(clades) %>%
dplyr::summarise(NPisp20 = n())
treeMCC <- TreeSet[["treeMCC"]]
TreeSubset <- drop.tip(treeMCC, as.character(treeMCC$tip.label[which(!treeMCC$tip.label %in%
DataSubsetMCC20$species)]))
datacomp <- comparative.data(data = DataSubsetMCC20, phy = TreeSubset, names.col = "species",
vcv = TRUE, na.omit = TRUE, warn.dropped = TRUE)
# modpi20 <- pgls(log(EstPi) ~ log(tipRate), data = datacomp, lambda = 'ML')
# saveRDS(modpi20, paste0(folder_path,'pgls/extra/modpi20.rds'))
modpi20 <- readRDS(paste0(folder_path, "pgls/extra/modpi20.rds"))
# summary(modpi20)
#####
DataSubsetMCC50 <- gendivSpRateN %>%
filter(set %in% "treeMCC", Nind > 49)
Nspecies50 <- DataSubsetMCC50 %>%
group_by(clades) %>%
dplyr::summarise(NPisp50 = n())
treeMCC <- TreeSet[["treeMCC"]]
TreeSubset <- drop.tip(treeMCC, as.character(treeMCC$tip.label[which(!treeMCC$tip.label %in%
DataSubsetMCC50$species)]))
datacomp <- comparative.data(data = DataSubsetMCC50, phy = TreeSubset, names.col = "species",
vcv = TRUE, na.omit = TRUE, warn.dropped = TRUE)
# modpi50 <- pgls(log(EstPi) ~ log(tipRate), data = datacomp, lambda = 'ML')
# saveRDS(modpi50, paste0(folder_path,'pgls/extra/modpi50.rds'))
modpi50 <- readRDS(paste0(folder_path, "pgls/extra/modpi50.rds"))
# summary(modpi50)
#####
DataSubsetMCC75 <- gendivSpRateN %>%
filter(set %in% "treeMCC", Nind > 74)
Nspecies75 <- DataSubsetMCC75 %>%
group_by(clades) %>%
dplyr::summarise(NPisp75 = n())
treeMCC <- TreeSet[["treeMCC"]]
TreeSubset <- drop.tip(treeMCC, as.character(treeMCC$tip.label[which(!treeMCC$tip.label %in%
DataSubsetMCC75$species)]))
datacomp <- comparative.data(data = DataSubsetMCC75, phy = TreeSubset, names.col = "species",
vcv = TRUE, na.omit = TRUE, warn.dropped = TRUE)
# modpi75 <- pgls(log(EstPi) ~ log(tipRate), data = datacomp, lambda = 'ML')
# saveRDS(modpi75, paste0(folder_path,'pgls/extra/modpi75.rds'))
modpi75 <- readRDS(paste0(folder_path, "pgls/extra/modpi75.rds"))
# summary(modpi75)
#####
DataSubsetMCC100 <- gendivSpRateN %>%
filter(set %in% "treeMCC", Nind > 99)
Nspecies100 <- DataSubsetMCC100 %>%
group_by(clades) %>%
dplyr::summarise(NPisp100 = n())
treeMCC <- TreeSet[["treeMCC"]]
TreeSubset <- drop.tip(treeMCC, as.character(treeMCC$tip.label[which(!treeMCC$tip.label %in%
DataSubsetMCC100$species)]))
datacomp <- comparative.data(data = DataSubsetMCC100, phy = TreeSubset, names.col = "species",
vcv = TRUE, na.omit = TRUE, warn.dropped = TRUE)
# modpi100 <- pgls(log(EstPi) ~ log(tipRate), data = datacomp, lambda = 'ML')
# saveRDS(modpi100, paste0(folder_path,'pgls/extra/modpi100.rds'))
modpi100 <- readRDS(paste0(folder_path, "pgls/extra/modpi100.rds"))
# summary(modpi100)#modpirateMCC <- readRDS(paste0(folder_path,'pgls/extra/pglsPispRateMCC.rds'))
Nspecies5 <- gendivSpRate %>%
filter(set %in% 'treeMCC') %>%
group_by(clades) %>%
dplyr::summarise(Nsp = n(),
NPisp5 = sum(!is.na(EstPi)))
NspeciesSF <- left_join(Nspecies5,Nspecies10) %>%
left_join(Nspecies20) %>%
left_join(Nspecies50) %>%
left_join(Nspecies75) %>%
left_join(Nspecies100) %>%
pivot_longer(cols=-c(clades,Nsp)) %>%
mutate(SF = value/Nsp) %>%
filter(clades %in% unique(PGLSclade$clade))
SFPlot <- ggplot(NspeciesSF,
aes(x = fct_inorder(str_replace(name, "NPisp","")),
y = SF)) +
geom_boxplot() +
geom_point(aes(color = clades)) +
scale_colour_manual(values = tcol[2:15],
labels = pglsSet) +
theme_bw() +
labs(y = "Sampling fraction of available species\ngiven a threshold",
x = "Threshold for min. number of ind. per alignment") +
theme(legend.position = 'bottom')
NspThres <- NspeciesSF %>%
dplyr::group_by(name) %>%
dplyr::summarize(Nsp = sum(value, na.rm = T)) %>%
rename(thres = name)
pglsSum <- PGLSglobal %>%
filter(set %in% 'treeMCC',
analysis %in% 'EstPi',
Term %in% "log(tipRate)") %>%
select(Estimate, Pr...t..) %>%
mutate(thres = 'NPisp5') %>%
bind_rows(.,data.frame(thres = 'NPisp10', t(summary(modpi10)$coefficients[2,c(1,4)])),
data.frame(thres = 'NPisp20', t(summary(modpi20)$coefficients[2,c(1,4)])),
data.frame(thres = 'NPisp50', t(summary(modpi50)$coefficients[2,c(1,4)])),
data.frame(thres = 'NPisp75', t(summary(modpi75)$coefficients[2,c(1,4)])),
data.frame(thres = 'NPisp100', t(summary(modpi100)$coefficients[2,c(1,4)]))) %>%
left_join(NspThres)
pglsThres <- ggplot(pglsSum, aes(x = Nsp, y = Estimate)) +
geom_point(aes(color = Pr...t..)) +
ggrepel::geom_label_repel(aes(label = str_replace(thres, "NPisp",""))) +
theme_bw()+
scale_x_continuous(breaks=pglsSum$Nsp) +
scale_color_gradient(name = "P-value", #trans = "log",
breaks = c(1e-10, 0.05),
labels = c(1e-10, 0.05)) +
labs(x = "Number species in PGLS analysis for a given threshold",
y = "PGLS slope estimate") +
theme(legend.position = 'bottom',
panel.grid.minor = element_blank()) gendivThres <- ggplot(data = filter(gendivSpRate, set %in% 'treeMCC'),
aes(y = EstPi, x = tipRate)) +
geom_point(size = 2, alpha = 0.1) +
scale_y_continuous(trans='log10',expand=c(0,0)) +
scale_x_continuous(trans='log10',expand=c(0,0)) +
stat_smooth(aes(color = '5'), method=lm, position = "identity",
size=0.7, se = FALSE, geom = 'line', fullrange = TRUE) +
stat_smooth(data = DataSubsetMCC10,
aes(y = EstPi, x = tipRate, color = "10"), geom = 'line',
linetype = 'dashed', show_guide=TRUE,
method=lm, se = FALSE, position = "identity", size=0.7,
fullrange = TRUE) +
stat_smooth(data = DataSubsetMCC20,
aes(y = EstPi, x = tipRate, color = "20"), geom = 'line',
linetype = 'dashed', show_guide=TRUE,
method=lm, se = FALSE, position = "identity", size=0.7,
fullrange = TRUE) +
stat_smooth(data = DataSubsetMCC50,
aes(y = EstPi, x = tipRate, color = "50"), geom = 'line',
linetype = 'dashed', show_guide=TRUE,
method=lm, se = FALSE, position = "identity", size=0.7,
fullrange = TRUE) +
stat_smooth(data = DataSubsetMCC75,
aes(y = EstPi, x = tipRate, color = "75"), geom = 'line',
linetype = 'dashed', show_guide=TRUE,
method=lm, se = FALSE, position = "identity", size=0.7,
fullrange = TRUE) +
stat_smooth(data = DataSubsetMCC100,
aes(y = EstPi, x = tipRate, color = "100"), geom = 'line',
linetype = 'dashed', show_guide=TRUE,
method=lm, se = FALSE, position = "identity", size=0.7,
fullrange = TRUE) +
theme_bw() +
theme(#plot.margin = unit(c(0,0,0,0), "cm"),
legend.position = "right") +
labs(y = expression(paste(theta['T'])),
x = bquote('Speciation rate '~(Myr^-1))) +
scale_colour_manual(name="Threshold of number of\nindividuals per alignment",
values = c("5"="black","10"="lightskyblue",
"20"="olivedrab","50"="orange2",
"75"="red2","100"="purple"),
breaks = c("5","10","20","50","75","100"))
qjoint <- ggarrange(ggarrange(NULL, gendivThres, widths = c(0.2,1),
labels = c("","A")),
ggarrange(SFPlot, pglsThres, nrow = 1, align = 'hv',
labels = c("B","C")),
ncol = 1,
heights = c(1,1.2), align = 'hv')
qjointggsave(paste0(fig_path, 'FigS6.pdf'), qjoint,
width = 16, height = 10, useDingbats = FALSE )
ggsave(paste0(fig_path, 'FigS6.png'), qjoint,
width = 16, height = 10)Fig. S6. Differences in threshold for number of individuals per species alignment. Slope of the OLS given six thresholds for minimum number of sequences (A), with continuous line for the used threshold across all analyses (5 individuals) and grey points for used species with speciation rates from MCC tree. The sampling fraction per clade for each threshold (B) was estimated relative to the total number of species in the DNA-only phylogeny. Relationship between PGLS slopes (MCC tree) and the number of species in the analysis (C) given the threshold of number of sequences per species alignment.
pglsResulttraits <- PGLSglobal_traits %>%
filter(!Term %in% '(Intercept)') %>%
rename(pvalue = Pr...t..) %>%
mutate(pvalueBin = ifelse(pvalue < 0.05, 'Signif.','Not signif.'),
Term = recode(Term, `log(tipRate)` = "Speciation rate",
`log(BodyMassKg_notInputed)` = "Body Mass",
`log(geoArea_km2)` = "Range area",
`log(mean_temp)` = "Mean temperature",
`log(litter_or_clutch_size_n)` = "Litter size",
`log(GenerationLength_d)` = "Generation length")) %>%
select(analysis, set, Estimate, Term, pvalue, pvalueBin)
pglsResulttraits$Term <- factor(pglsResulttraits$Term,
levels = c("Speciation rate", "Body Mass", "Range area",
"Mean temperature", "Litter size",
"Generation length"))
pglsResulttraits$analysis <- factor(pglsResulttraits$analysis,
levels = c("piTraits",
"SpRateTraits",
"piSpRateTraits"),
labels = c(expression(paste(theta["T"]," ~ Traits")),
expression(paste(lambda, " ~ Traits")),
expression(paste(theta["T"],' ~ ', lambda, " + Traits"))))
pp1 <- ggplot(data = filter(pglsResulttraits, !set %in% 'treeMCC',
analysis %in% 'paste(theta["T"], " ~ ", lambda, " + Traits")')) +
geom_vline(aes(xintercept = 0), linetype= 2) +
geom_point(aes(x = Estimate, y = fct_rev(fct_inorder(Term)), color = pvalueBin),
alpha = 0.2, size = 2, show.legend = F) +
facet_wrap(~analysis, ncol = 1, drop = T, scales = 'free_y', labeller = label_parsed) +
theme_bw() + labs(x = "Slope Estimate") +
xlim(-0.46, 0.34) +
scale_colour_manual(values=c('gray80','red')) +
theme(axis.title.y=element_blank(), #axis.title.x=element_blank(),
#axis.text.x=element_blank(), #axis.ticks.x=element_blank(),
panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
text = element_text(size=15))
pp2 <- ggplot(data = droplevels(filter(pglsResulttraits, !set %in% 'treeMCC',
!analysis %in% 'paste(theta["T"], " ~ ", lambda, " + Traits")'))) +
geom_vline(aes(xintercept = 0), linetype= 2) +
geom_point(aes(x = Estimate, y = fct_rev(fct_inorder(Term)), color = pvalueBin),
alpha = 0.2, size = 2, show.legend = F) +
facet_wrap(~analysis, ncol = 1, drop = T, scales = 'free', labeller = label_parsed) +
theme_bw() + xlim(-0.46, 0.34) +
labs(title = 'PGLS') +
scale_colour_manual(values=c('gray80','red')) +
theme(axis.title.y=element_blank(), axis.title.x=element_blank(),
axis.text.x=element_blank(), axis.ticks.x=element_blank(),
panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
text = element_text(size=15))
p = ggarrange(pp2, pp1, nrow = 2, heights = c(1.7, 1), align = "v")
BMLMglobal_traitsExtPos <- BMLMglobal_traitsExt %>%
mutate(Term = recode(Term, b_logtipRate = "Speciation rate",
b_logBodyMassKg_notInputed = "Body Mass",
b_loggeoArea_km2 = "Range area",
b_logmean_temp = "Mean temperature",
b_loglitter_or_clutch_size_n = "Litter size",
b_logGenerationLength_d = "Generation length"),
ana = 'BMLM')
BMLMglobal_traitsExtPos$Term <- factor(BMLMglobal_traitsExtPos$Term,
levels = c("Speciation rate", "Body Mass", "Range area",
"Mean temperature", "Litter size",
"Generation length"))
BMLMglobal_traitsExtPos$analysis <- factor(BMLMglobal_traitsExtPos$analysis,
levels = c("piTraits",
"SpRateTraits",
"piSpRateTraits"),
labels = c(expression(paste(theta["T"]," ~ Traits")),
expression(paste(lambda, " ~ Traits")),
expression(paste(theta["T"],' ~ ', lambda, " + Traits"))))
b1 <- ggplot(data = filter(BMLMglobal_traitsExtPos, !set %in% 'treeMCC',
analysis %in% 'paste(theta["T"], " ~ ", lambda, " + Traits")')) +
geom_vline(aes(xintercept = 0), linetype= 2) +
geom_halfeyeh(aes(x = Estimate, y = fct_rev(fct_inorder(Term))),
point_interval = median_qi, .width = .95, scale = 1, #size = 4,
#position = position_nudge(y = 0.1),
alpha = 0.8) +
facet_wrap(~analysis, ncol = 1, drop = T, scales = 'free_y', labeller = label_parsed) +
labs(x = "Slope Estimate") +
theme_bw() + xlim(-0.89, 0.81) +
theme(
axis.title.y=element_blank(),
axis.text.y=element_blank(), axis.ticks.y=element_blank(),
panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
text = element_text(size=15))
b2 <- ggplot(data = droplevels(filter(BMLMglobal_traitsExtPos, !set %in% 'treeMCC',
!analysis %in% 'paste(theta["T"], " ~ ", lambda, " + Traits")'))) +
geom_vline(aes(xintercept = 0), linetype= 2) +
geom_halfeyeh(aes(x = Estimate, y = fct_rev(fct_inorder(Term))),
point_interval = median_qi, .width = .95, scale = 1, #size = 4,
#position = position_nudge(y = 0.1),
alpha = 0.8) +
facet_wrap(~analysis, ncol = 1, drop = T, scales = 'free_y', labeller = label_parsed ) +
labs(title = 'BMLM') +
theme_bw() + xlim(-0.89, 0.81) +
theme(axis.title=element_blank(),
#axis.title.y=element_blank(),
axis.text=element_blank(), axis.ticks=element_blank(),
panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
text = element_text(size=15))
bb = ggarrange(b2, b1, nrow = 2, heights = c(1.7, 1), align = "v")
q5 <- ggarrange(p,bb,ncol = 2, widths = c(1.35,1), align = "hv")
q5ggsave(paste0(fig_path, 'FigS7.pdf'), q5, width = 10, height = 8, useDingbats = FALSE )
ggsave(paste0(fig_path, 'FigS7.png'), q5, width = 10, height = 8)Fig. S7. Results for the three models to test the effect of including life-history traits in the model with genetic diversity and speciation rate. The model with speciation rate as predictor suggest the association between genetic diversity and speciation rate is not due to traits affecting both variables. Analyses using the posterior dataset with PGLS estimates colored red when significant (p-value < 0.05) and BMLM results plotted with median and 95% confidence intervals from samples draw from all posterior dataset analyses.
## pi vs mutation rate & Ne
p3 = ggplot(mgendivSpRateMutRate, aes(y = EstPi, x = mean)) + geom_errorbar(aes(xmin = lower.ci,
xmax = upper.ci), color = "gray70") + geom_point(size = 0.9, color = "gray50") +
stat_smooth(method = lm, color = "black", se = F, fullrange = T) + scale_x_log10() +
scale_y_log10(breaks = c(0.001, 0.01, 0.1), labels = c(0.001, 0.01, 0.1), limits = c(0.00017,
0.15)) + facet_wrap(~fct_rev(variable), labeller = label_parsed, scales = "free_x",
switch = "x") + theme_bw() + labs(y = expression(paste("Genetic diversity - ",
theta["T"]))) + theme(axis.title.x = element_blank(), axis.title.y = element_text(size = 14),
strip.text = element_text(size = 14), strip.placement = "outside", strip.background.x = element_blank(),
panel.grid.major = element_blank(), panel.grid.minor = element_blank())
p3ggsave(paste0(fig_path, "FigS8.pdf"), p3, width = 9, height = 5, useDingbats = FALSE)
ggsave(paste0(fig_path, "FigS8.png"), p3, width = 9, height = 5)Fig. S8. Relationships between genetic diversity (\(\theta_{T}\)) and the theoretical components of \(\theta\): effective population size (\(N_e\)) and mutation rate (\(\mu\)). Plots with means (\(N_e\)) and (\(\mu\)) and 95% confidence intervals with a regression line and log scaled axis.
#### with at least 10 individuals to allow for more variants within alignments
#### and only for species with non null variation for all codon sites
wd <- paste0(folder_path, "pgls/global_perSite/output/")
dir.create(wd, recursive = T)
#### first detect species are out of frame and remove them from data set
library(bioseq)
fastas <- list.files(paste0(folder_path, "superCrunch_family/Output_alignments/"),
pattern = "fasta", full.names = T)
checkFrame <- data.frame()
for (i in fastas) {
dna <- read_fasta(i)
if (length(dna) > 4) {
species <- tools::file_path_sans_ext(basename(i))
trans <- seq_translate(dna, code = 2, codon_frame = 1)
checkFrame <- bind_rows(checkFrame, data.frame(species = species, check = ifelse(any(str_detect(trans,
"\\*")), "notInFrame1", "inFrame1")))
if (checkFrame[checkFrame$species %in% species, "check"] %in% "notInFrame1") {
dna <- read_fasta(i)
trans <- seq_translate(dna, code = 2, codon_frame = 2)
checkFrame[checkFrame$species %in% species, "check"] <- ifelse(any(str_detect(trans,
"\\*")), "notInFrame12", "inFrame2")
}
if (checkFrame[checkFrame$species %in% species, "check"] %in% "notInFrame12") {
trans <- seq_translate(dna, code = 2, codon_frame = 3)
checkFrame[checkFrame$species %in% species, "check"] <- ifelse(any(str_detect(trans,
"\\*")), "notInFrame123", "inFrame3")
}
}
}
write.table(checkFrame, row.names = FALSE, quote = FALSE, sep = "\t", paste0(folder_path,
"genetic_diversity/speciesFrame.txt"))
# discard species without genetic diversity, species which either mean
# subsampled pi or theta are outside the range of 1000 subsamples to 5
# individuals, and species not in frame1 (only about 50 some species are not in
# frame)
gen.div <- read.delim(paste0(folder_path, "genetic_diversity/GenDiv_subsample5Ind.txt")) %>%
filter(EstPi > 0, !outPi %in% "out", !outTheta %in% "out", species %in% checkFrame[checkFrame$check %in%
"inFrame1", "species"])
clades <- read.delim(paste0(folder_path, "speciation_rate/input/MamPhy_5911sp_tipGenFamOrdCladeGenesSampPC.txt")) %>%
drop_na(PC) %>%
mutate(species = word(tiplabel, 1, 2, sep = "_"), clades = sub("^PC\\d+_", "",
PC)) %>%
dplyr::select(species, clades)
spRate <- read.delim(paste0(folder_path, "speciation_rate/output/MCCposterior100_tipRate.txt")) %>%
rename(set = treeN) %>%
left_join(., clades, by = "species")
GendivSpRate10 <- spRate %>%
arrange(clades) %>%
inner_join(., select(gen.div, species, Nind, EstPi, EstPi_1:EstPi_3), by = "species") %>%
filter(Nind > 10, EstPi > 0, EstPi_1 > 0, EstPi_2 > 0, EstPi_3 > 0) %>%
droplevels()
saveRDS(GendivSpRate10, paste0(folder_path, "genetic_diversity/GendivSpRate10.rds"))
repiTree <- unique(GendivSpRate10$set)[repi]
DataSubset <- GendivSpRate10 %>%
filter(set %in% repiTree) %>%
drop_na()
## load phylogenies and speciation rates from 100 posterior trees + MCC tree
load(paste0(folder_path, "speciation_rate/output/upham_4064sp_FR_MCCposterior100.rdata")) #loads TreeSet
treeMCC <- TreeSet[[repiTree]] ### to run per tree in the cluster
TreeSubset <- drop.tip(treeMCC, as.character(treeMCC$tip.label[which(!treeMCC$tip.label %in%
DataSubset$species)]))
TreeSubset
datacomp <- comparative.data(data = DataSubset, phy = TreeSubset, names.col = "species",
vcv = TRUE, na.omit = TRUE, warn.dropped = TRUE)
tryCatch({
modEstPi <- NULL
modEstPi <- pgls(log(EstPi) ~ log(tipRate), data = datacomp, lambda = "ML")
modEstPi1 <- NULL
modEstPi1 <- pgls(log(EstPi_1) ~ log(tipRate), data = datacomp, lambda = "ML")
modEstPi2 <- NULL
modEstPi2 <- pgls(log(EstPi_2) ~ log(tipRate), data = datacomp, lambda = "ML")
modEstPi3 <- NULL
modEstPi3 <- pgls(log(EstPi_3) ~ log(tipRate), data = datacomp, lambda = "ML")
}, error = function(e) {
cat("Error in", conditionMessage(e), "\n")
})
pgls <- list(EstPi = modEstPi, EstPi1 = modEstPi1, EstPi2 = modEstPi2, EstPi3 = modEstPi3)
saveRDS(pgls, paste0(wd, "global_gendivSpRatePerSite_results_", repiTree, ".rds"))## load gendiv object that was filtered out of species with cytb out of frame 1
GendivSpRate10 <- readRDS(paste0(folder_path, "genetic_diversity/GendivSpRate10.rds"))
mGendivSpRateSite <- GendivSpRate10 %>%
filter(set %in% "treeMCC") %>%
pivot_longer(cols = EstPi:EstPi_3) %>%
mutate(bp = "Boxplot", corr = "OLS")
mGendivSpRateSite$name <- factor(mGendivSpRateSite$name, labels = c(expression(paste("Total ",
theta["T"])), expression(paste("First position ", theta["T"])), expression(paste("Second position ",
theta["T"])), expression(paste("Third position ", theta["T"]))))
pglssum101 <- readRDS(paste0(folder_path, "pgls/global_perSite/global_gendivSpRatePerSite_PGLSresults.rds")) %>%
filter(term %in% "log(tipRate)") %>%
remove_rownames()
pgls100 <- pglssum101 %>%
filter(!set %in% "treeMCC") %>%
mutate(pvalueBin = ifelse(Pr...t.. < 0.01, "Signif.", "Not signif."), pgls = "PGLS",
name = analysis) %>%
select(set, pgls, name, Estimate, pvalueBin)
pgls100$name <- factor(pgls100$name, labels = c(expression(paste("Total ", theta["T"])),
expression(paste("First position ", theta["T"])), expression(paste("Second position ",
theta["T"])), expression(paste("Third position ", theta["T"]))))
meanTreeEstimate <- pgls100 %>%
group_by(name) %>%
dplyr::summarize(mean(Estimate))
pglssumMCC <- pglssum101 %>%
filter(set %in% "treeMCC") %>%
mutate(name = unique(mGendivSpRateSite$name), pgls = "PGLS", estimate = paste("MCC Estimate =",
round(Estimate, 4)), pvalue = paste("MCC p-value =", formatC(Pr...t.., format = "E",
digits = 3)), label = paste(estimate, pvalue, collapse = "\n")) %>%
left_join(meanTreeEstimate)
ss <- ggplot(mGendivSpRateSite, aes(y = value)) + geom_boxplot() + scale_y_log10() +
theme_bw() + labs(y = "Genetic diversity", x = " ") + facet_grid(bp ~ name, labeller = label_parsed) +
theme(axis.text.x = element_blank(), axis.ticks.x = element_blank(), strip.text = element_text(size = 12),
panel.spacing.x = unit(0, "lines"), panel.grid.major = element_blank(), panel.grid.minor = element_blank())
ps <- ggplot(mGendivSpRateSite, aes(x = tipRate, y = value)) + geom_point(color = "gray50") +
geom_smooth(method = "lm", se = F, color = "black") + facet_grid(corr ~ name,
labeller = label_parsed) + scale_y_log10() + scale_x_log10(breaks = c(0.05, 0.1,
0.5, 1)) + theme_bw() + labs(y = "Genetic diversity", x = "Speciation rate (MCC tree)") +
theme(strip.text.x = element_blank(), panel.spacing.x = unit(0, "lines"), strip.text.y = element_text(size = 12),
panel.grid.major = element_blank(), panel.grid.minor = element_blank())
es <- ggplot() + geom_density(data = pgls100, aes(x = Estimate), trim = TRUE) + geom_rug(data = pgls100,
aes(x = Estimate)) + geom_text(data = pglssumMCC, aes(x = `mean(Estimate)`, y = 1.5,
label = estimate), size = 3, vjust = 0) + geom_text(data = pglssumMCC, aes(x = `mean(Estimate)`,
y = 0.4, label = pvalue), size = 3, vjust = 0) + facet_grid(pgls ~ name, labeller = label_parsed) +
theme_bw() + labs(y = " ", x = "PGLS estimates across 100 posterior trees") +
theme(strip.text.x = element_blank(), strip.text.y = element_text(size = 12),
panel.spacing.x = unit(0, "lines"), panel.grid.major = element_blank(), panel.grid.minor = element_blank()) +
xlim(-0.48, -0.19)
q8 = ggarrange(ss, NULL, ps, NULL, es, ncol = 1, heights = c(1, -0.15, 1, -0.03,
1), align = "hv", labels = c("A", "", "B", "", "C"))
q8ggsave(paste0(fig_path, "FigS9.pdf"), q8, width = 12, height = 8, useDingbats = FALSE)
ggsave(paste0(fig_path, "FigS9.png"), q8, width = 12, height = 8)Fig. S9. Genetic diversity (\(\theta_{T}\)) estimated per codon site position (A), linear regression with speciation rates (B) and PGLS estimates across 100 posterior trees (all p-values < 0.01) as well as MCC estimate and respective p-value. Data set with at least 10 individuals (1095 species) to allow for more polymorphisms across all codon sites.
pirangeEst <- gen.div0 %>%
select(species, EstPi, subPi_mean, subPi_min, subPi_max, outPi, subPi_lower.CI,
subPi_upper.CI) %>%
arrange(subPi_mean) %>%
pivot_longer(cols = EstPi:subPi_mean) %>%
ggplot() + geom_point(aes(y = fct_inorder(species), x = value, alpha = fct_infreq(outPi),
color = fct_infreq(outPi), shape = name, size = fct_infreq(outPi))) + theme_bw() +
scale_x_log10() + scale_color_manual(values = c("gray20", "#71D692", "#805C31"),
labels = c("Included", "With 5 ind.", "Excluded"), name = expression(paste("Included species from ",
theta["T"]))) + scale_alpha_manual(values = c(0.1, 0.4, 1), labels = c("Included",
"With 5 ind.", "Excluded"), name = expression(paste("Included species from ",
theta["T"]))) + scale_size_manual(values = c(0.2, 0.4, 1.5), labels = c("Included",
"With 5 ind.", "Excluded"), name = expression(paste("Included species from ",
theta["T"]))) + scale_shape_manual(values = c(1, 2), labels = c(expression(paste("Original ",
theta["T"])), expression(paste("Mean subsampled ", theta["T"]))), name = "") +
theme(legend.title = element_text(color = col_v1[95]), axis.title.y = element_blank(),
axis.text.y = element_blank(), axis.ticks.y = element_blank()) + guides(color = guide_legend(order = 1),
size = guide_legend(order = 1), alpha = guide_legend(order = 1), shape = guide_legend(order = 2)) +
labs(x = "")
thetarangeEst <- gen.div0 %>%
select(species, EstTheta, subTheta_mean, subTheta_min, subTheta_max, outTheta,
subTheta_lower.CI, subTheta_upper.CI) %>%
arrange(subTheta_mean) %>%
pivot_longer(cols = EstTheta:subTheta_mean) %>%
ggplot() + geom_point(aes(y = fct_inorder(species), x = value, alpha = fct_infreq(outTheta),
color = fct_infreq(outTheta), shape = name, size = fct_infreq(outTheta))) + theme_bw() +
scale_x_log10() + scale_color_manual(values = c("gray20", "#71D692", "#805C31"),
labels = c("Included", "With 5 ind.", "Excluded"), name = expression(paste("Included species from ",
theta["W"]))) + scale_alpha_manual(values = c(0.1, 0.4, 1), labels = c("Included",
"With 5 ind.", "Excluded"), name = expression(paste("Included species from ",
theta["W"]))) + scale_size_manual(values = c(0.2, 0.4, 1.5), labels = c("Included",
"With 5 ind.", "Excluded"), name = expression(paste("Included species from ",
theta["W"]))) + scale_shape_manual(values = c(1, 2), labels = c(expression(paste("Original ",
theta["W"])), expression(paste("Mean subsampled ", theta["W"]))), name = "") +
theme(legend.title = element_text(color = col_v2[90]), axis.title.y = element_blank(),
axis.text.y = element_blank(), axis.ticks.y = element_blank()) + guides(color = guide_legend(order = 1),
size = guide_legend(order = 1), alpha = guide_legend(order = 1), shape = guide_legend(order = 2)) +
labs(x = "Genetic diversity (logged)")
ggarrange(pirangeEst, thetarangeEst, ncol = 1, labels = "auto")Genetic diversity is plotted for all species ordered by mean subsampled, showing how many species were excluded because the original estimates done with all samples was outside the range of subsampling genetic diversity 1000 times to 5 individuals.
excP <- gen.div0 %>%
filter(outPi %in% "out") %>%
select(species, EstPi, subPi_mean, subPi_min, subPi_max) %>%
pivot_longer(cols = EstPi:subPi_mean) %>%
filter(value > 0) %>%
ggplot() + geom_pointrange(aes(y = species, x = value, xmin = subPi_min, xmax = subPi_max,
shape = name), color = col_v1[95]) + theme_bw() + scale_x_log10() + scale_shape_manual(values = c(17,
19), labels = c("Original", "Mean subsampled"), name = "") + labs(x = "", y = expression(paste("Excluded species - ",
theta["T"]))) + theme(legend.position = "top") + guides(shape = guide_legend(override.aes = list(color = "black")))
excT <- gen.div0 %>%
filter(outTheta %in% "out") %>%
select(species, EstTheta, subTheta_mean, subTheta_min, subTheta_max) %>%
pivot_longer(cols = EstTheta:subTheta_mean) %>%
filter(value > 0) %>%
ggplot() + geom_pointrange(aes(y = fct_rev(species), x = value, xmin = subTheta_min,
xmax = subTheta_max, shape = name), color = col_v2[90], show.legend = F) + theme_bw() +
scale_x_log10() + scale_shape_manual(values = c(17, 19), labels = c("Original",
"Mean subsampled"), name = "") + labs(x = "Genetic diversity (logged)", y = expression(paste("Excluded species - ",
theta["W"])))
ggarrange(excP, excT, ncol = 1, heights = c(0.18, 1))Plotting which species were excluded with respective original and mean subsampled given each species min and max range of subsampled values of genetic diversity.
mparClaDS <- parClaDS %>%
mutate(m = alpha * exp((sigma^2)/2)) %>%
relocate(m, .before = epsilon) %>%
pivot_longer(., cols = sigma:epsilon)
hp <- ggplot() + geom_violin(data = filter(mparClaDS, !set %in% "treeMCC"), aes(x = name,
y = value)) + geom_point(data = filter(mparClaDS, !set %in% "treeMCC"), aes(x = name,
y = value), size = 0.5, position = "jitter", col = "gray60") + stat_summary(data = filter(mparClaDS,
!set %in% "treeMCC"), aes(x = name, y = value), fun = "mean", geom = "point",
size = 2) + geom_point(data = filter(mparClaDS, set %in% "treeMCC"), aes(x = name,
y = value), color = "red", size = 3, shape = 17) + facet_wrap(~name, scale = "free",
ncol = 4, labeller = label_parsed) + theme_bw() + theme(axis.title = element_blank(),
axis.ticks.x = element_blank(), axis.text.x = element_blank(), panel.grid = element_blank())
hpggsave(paste0(fig_path, "FigS2.pdf"), hp, width = 5, height = 3, useDingbats = FALSE)Fig. SXX. ClaDS hyper-parameters \(\alpha\), \(\epsilon\), \(\sigma\) and m = \(\alpha\) × exp(\(\sigma^{2}\) /2) from 100 posterior trees (mean as black circles) and MCC tree (red triangles).
DR <- read.delim(paste0(folder_path, "/speciation_rate/upham_tipRates_DR_BAMM/DR-SUMMARY_MamPhy_BDvr_Completed_5911sp_topoCons_NDexp_all10k_v2_expanded.txt"),
header = T, sep = " ") %>%
rownames_to_column(var = "species") %>%
mutate(species = word(species, 1, 2, sep = "_")) %>%
select(species, means) %>%
rename(DRrate_mean = means)
BAMM <- read.delim(paste0(folder_path, "/speciation_rate/upham_tipRates_DR_BAMM/globalMeanTipRates_sample10_tree1-10_NDexp_wNetDiv.txt"),
header = T, sep = " ") %>%
rownames_to_column(var = "species") %>%
mutate(species = word(species, 1, 2, sep = "_")) %>%
select(species, arithMeans_Lam)
ratesC1 <- spRate %>%
filter(!set %in% "treeMCC") %>%
group_by(species) %>%
dplyr::summarise(ClaDSrate_mean = mean(tipRate, na.rm = TRUE), clades = unique(clades)) %>%
left_join(., DR, by = "species") %>%
left_join(., BAMM, by = "species") %>%
pivot_longer(cols = DRrate_mean:arithMeans_Lam) %>%
ggplot(aes(x = value, y = ClaDSrate_mean, color = name)) + geom_point(alpha = 0.3,
size = 0.9) + geom_smooth(method = "lm") + theme_bw() + labs(y = "Mean tip speciation rates (100 trees)",
x = "Upham rates", color = "") + scale_x_log10() + scale_y_log10()
ratesC2 <- spRate %>%
filter(set %in% "treeMCC") %>%
left_join(., DR, by = "species") %>%
left_join(., BAMM, by = "species") %>%
pivot_longer(cols = DRrate_mean:arithMeans_Lam) %>%
ggplot(aes(x = value, y = tipRate, color = name)) + geom_point(alpha = 0.3, size = 0.9) +
geom_smooth(method = "lm") + theme_bw() + labs(y = "MCC tip speciation rates",
x = "Upham rates", color = "") + scale_x_log10() + scale_y_log10()
ggarrange(ratesC1, ratesC2, ncol = 2, common.legend = T)gendivSpRate3 <- gendivSpRate %>%
left_join(., DR, by = "species") %>%
left_join(., BAMM, by = "species") %>%
filter(set %in% "treeMCC") %>%
select(species, EstPi, DRrate_mean, arithMeans_Lam, tipRate) %>%
drop_na()
# TreeSubset <- drop.tip(treeMCC,
# as.character(treeMCC$tip.label[which(!treeMCC$tip.label %in%
# gendivSpRate3$species)])) datacomp <- comparative.data(data = gendivSpRate3,
# phy = TreeSubset, names.col = 'species', vcv = TRUE, na.omit = TRUE,
# warn.dropped = TRUE) modDR <- pgls(log(EstPi) ~ log(DRrate_mean), data =
# datacomp, lambda = 'ML') saveRDS(modDR,
# paste0(folder_path,'pgls/extra/pgls_piDR.rds'))
modDR <- readRDS(paste0(folder_path, "pgls/extra/pgls_piDR.rds"))
summary(modDR)##
## Call:
## pgls(formula = log(EstPi) ~ log(DRrate_mean), data = datacomp,
## lambda = "ML")
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.38899 -0.06502 0.00120 0.06664 0.29202
##
## Branch length transformations:
##
## kappa [Fix] : 1.000
## lambda [ ML] : 0.522
## lower bound : 0.000, p = < 2.22e-16
## upper bound : 1.000, p = < 2.22e-16
## 95.0% CI : (0.397, 0.632)
## delta [Fix] : 1.000
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -4.777975 0.589072 -8.1110 8.882e-16 ***
## log(DRrate_mean) -0.303231 0.054457 -5.5683 2.945e-08 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.1002 on 1874 degrees of freedom
## Multiple R-squared: 0.01628, Adjusted R-squared: 0.01575
## F-statistic: 31.01 on 1 and 1874 DF, p-value: 2.945e-08
summary(lm(log(tipRate) ~ log(DRrate_mean), data = gendivSpRate3))##
## Call:
## lm(formula = log(tipRate) ~ log(DRrate_mean), data = gendivSpRate3)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.8161 -0.2113 -0.0066 0.1898 0.9640
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -0.29982 0.01968 -15.23 <2e-16 ***
## log(DRrate_mean) 0.74552 0.01186 62.86 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.2876 on 1874 degrees of freedom
## Multiple R-squared: 0.6783, Adjusted R-squared: 0.6781
## F-statistic: 3952 on 1 and 1874 DF, p-value: < 2.2e-16
# modBAMM <- pgls(log(EstPi) ~ log(arithMeans_Lam), data = datacomp, lambda =
# 'ML') saveRDS(modBAMM, paste0(folder_path,'pgls/extra/pgls_piBAMM.rds'))
modBAMM <- readRDS(paste0(folder_path, "pgls/extra/pgls_piBAMM.rds"))
summary(modBAMM)##
## Call:
## pgls(formula = log(EstPi) ~ log(arithMeans_Lam), data = datacomp,
## lambda = "ML")
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.32230 -0.06630 0.00367 0.07095 0.35680
##
## Branch length transformations:
##
## kappa [Fix] : 1.000
## lambda [ ML] : 0.524
## lower bound : 0.000, p = < 2.22e-16
## upper bound : 1.000, p = < 2.22e-16
## 95.0% CI : (0.401, 0.633)
## delta [Fix] : 1.000
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -5.35530 0.63434 -8.4423 < 2.2e-16 ***
## log(arithMeans_Lam) -0.73664 0.14576 -5.0537 4.754e-07 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.1006 on 1874 degrees of freedom
## Multiple R-squared: 0.01345, Adjusted R-squared: 0.01292
## F-statistic: 25.54 on 1 and 1874 DF, p-value: 4.754e-07
### try to get pgls of spRate ~ pi but doesn't work
MCCgendivSpRate <- gendivSpRate %>%
filter(set %in% "treeMCC", !is.na(EstPi))
TreeSubset <- drop.tip(treeMCC, as.character(treeMCC$tip.label[which(!treeMCC$tip.label %in%
MCCgendivSpRate$species)]))
datacomp <- comparative.data(data = MCCgendivSpRate, phy = TreeSubset, names.col = "species",
vcv = TRUE, na.omit = TRUE, warn.dropped = TRUE)
modspRatePi <- pgls(log(tipRate) ~ log(EstPi), data = datacomp, lambda = "ML")
saveRDS(modspRatePi, paste0(folder_path, "pgls/extra/pgls_spRatePi.rds"))
spRatePi <- readRDS(paste0(folder_path, "pgls/extra/pgls_spRatePi.rds"))
summary(modspRatePi)
summary(lm(data = MCCgendivSpRate, log(tipRate) ~ log(EstPi))) ## still suggest correlation even if not able to account for the phylogeny
dtset <- datacomp$data %>%
rownames_to_column("species")
fit1 <- gls(log(tipRate) ~ log(EstPi), data = dtset, correlation = corPagel(1, phy = datacomp$phy,
form = ~species), method = "ML")
summary(fit1)Using a multivariate analysis of variance (MANOVA):
load(paste0(folder_path, "speciation_rate/output/upham_4064sp_FR_MCCposterior100.rdata"))
GendivSpRate10 <- readRDS(paste0(folder_path, "genetic_diversity/GendivSpRate10.rds")) %>%
filter(set %in% "treeMCC")
MCCtree <- TreeSet[["treeMCC"]]
TreeSubset <- drop.tip(MCCtree, as.character(MCCtree$tip.label[which(!MCCtree$tip.label %in%
GendivSpRate10$species)]))
## install_github('JClavel/mvMORPH', ref='devel_1.1.5')
library(mvMORPH)
dtset <- cbind(GendivSpRate10$EstPi_1, GendivSpRate10$EstPi_2, GendivSpRate10$EstPi_3)
rownames(dtset) <- GendivSpRate10$species
dtset <- list(genDiv = log(dtset), spRate = log(GendivSpRate10$tipRate))
fit_mult <- mvgls(genDiv ~ spRate, data = dtset, tree = TreeSubset, model = "lambda",
method = "LL")
fit_mult##
## Call:
## mvgls(formula = genDiv ~ spRate, data = dtset, tree = TreeSubset,
## model = "lambda", method = "LL")
##
##
## Generalized least squares fit by REML
## Log-restricted-likelihood: -3652.189
##
##
## Parameter estimate(s):
## lambda: 0.3435
##
##
## Covariance matrix of size: 3 by 3
## for 1095 observations
##
## Coefficients:
## [,1] [,2] [,3]
## (Intercept) -5.4512 -6.6377 -3.9131
## spRate -0.4286 -0.2445 -0.4060
manova.gls(fit_mult, nperm = 999)## Sequential MANOVA Tests: Pillai test statistic
## Df test stat approx F num Df den Df Pr(>F)
## spRate 1 0.03493 13.16 3 1091 1.91e-08 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
manFit <- manova.gls(fit_mult, nperm = 999) ### significant
effectsize(manFit) # you can also have effect-size the equivalent of## ## Multivariate measure(s) of association ##
## spRate
## ξ^2 [Pillai] 0.03493
# R2 for multivariate using the effectsize() function on the manova.gls object
# in the devel version of mvMORPH.Pi1 vs Pi2
# test for differences between the responses using tests for 'repeated
# measures' (just a way to compare div1,2... by considering them like if they
# were different measures of the same thing): P=matrix(c(0,1,-1,0) ##
# '1*diversity_1 - 1*diversity_2 + 0*diversity_3 = 0'
P1 = matrix(c(1, -1, 0), nrow = 3) #'diversity_1' to 'diversity_2'
manova.gls(fit_mult, P = P1, L = c(0, 1), nperm = 999)## General Linear Hypothesis Test (repeated measures design): Pillai test statistic
## Df test stat approx F num Df den Df Pr(>F)
## Contrasts L 1 0.006649 7.317 1 1093 0.006939 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
Pi2 vs Pi3
P2 = matrix(c(0, 1, -1), nrow = 3) # 'diversity_2' to 'diversity_3'
manova.gls(fit_mult, P = P2, L = c(0, 1), nperm = 999)## General Linear Hypothesis Test (repeated measures design): Pillai test statistic
## Df test stat approx F num Df den Df Pr(>F)
## Contrasts L 1 0.00448 4.919 1 1093 0.02677 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
Pi1 vs Pi3
P3 = matrix(c(1, 0, -1), nrow = 3) # 'diversity_1' to 'diversity_3'
manova.gls(fit_mult, P = P3, L = c(0, 1), nperm = 999)## General Linear Hypothesis Test (repeated measures design): Pillai test statistic
## Df test stat approx F num Df den Df Pr(>F)
## Contrasts L 1 0.0002665 0.2913 1 1093 0.5895
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
### PGLS
pglsResulttraits2 <- pglsResulttraits %>%
mutate(size = case_when(set %in% "treeMCC" ~ "MCC", TRUE ~ "Posterior")) %>%
arrange(desc(size))
ppgls2 <- ggplot(pglsResulttraits2) + geom_point(aes(y = Estimate, x = Term, group = set,
size = size, colour = Term, shape = fct_rev(pvalueBin)), alpha = 0.4, stroke = 0.2,
position = position_dodge(width = 0.9)) + geom_hline(yintercept = 0, size = 0.2) +
theme_bw() + facet_wrap(~analysis, nrow = 1, drop = T, scales = "free_x", labeller = label_parsed) +
scale_shape_manual(name = "PGLS sig.", values = c(19, 1), labels = c("< 0.05",
"> 0.05")) + scale_colour_manual(name = "Models terms", values = iwanthue(6),
guide = guide_legend(override.aes = list(alpha = 1), nrow = 3)) + scale_size_manual(name = "Tree set",
labels = c("MCC tree", "Posterior trees"), values = c(3.5, 1)) + theme(legend.position = "top",
plot.margin = unit(c(0, 0, 0, 0), "cm"), legend.spacing.y = unit(0.1, "cm"),
panel.grid.major = element_blank(), panel.grid.minor = element_blank(), axis.ticks.x = element_blank(),
axis.text.x = element_blank()) + labs(y = "PGLS estimates", x = "") + guides(size = guide_legend(nrow = 3,
byrow = TRUE), shape = guide_legend(nrow = 3, byrow = TRUE))
### BMLM
BMLMglobal_traitssum2 <- BMLMglobal_traitsExtPos %>%
group_by(Term, analysis, set) %>%
median_qi(Estimate)
pbmlm2 <- ggplot() + geom_pointinterval(data = filter(BMLMglobal_traitssum2, set %in%
"treeMCC"), aes(y = Estimate, x = Term, ymin = .lower, ymax = .upper, group = set,
color = Term), alpha = 0.7, point_alpha = 0.5, point_size = 3, size = 3, position = position_dodge(width = 0.9)) +
geom_pointinterval(data = filter(BMLMglobal_traitssum2, !set %in% "treeMCC"),
aes(y = Estimate, x = Term, ymin = .lower, ymax = .upper, group = set, color = Term),
alpha = 0.15, point_alpha = 0.07, point_size = 1, size = 0.5, position = position_jitterdodge(jitter.width = 0.7,
dodge.width = 0.9)) + facet_wrap(~analysis, nrow = 1, drop = T, scales = "free_x",
labeller = label_parsed) + geom_hline(yintercept = 0, size = 0.2) + scale_color_manual(name = "Models terms",
values = iwanthue(6)) + theme_bw() + theme(legend.position = "bottom", strip.background = element_blank(),
strip.text.x = element_blank(), plot.margin = unit(c(-0.5, 0, 0, 0), "cm"), panel.grid.major = element_blank(),
panel.grid.minor = element_blank(), axis.ticks.x = element_blank(), axis.text.x = element_blank()) +
# axis.text.x = element_text(angle = 30, hjust = 1 )) +
labs(y = "BMLM estimates", x = "")
qs42 = ggpubr::ggarrange(ppgls2, pbmlm2, ncol = 1, nrow = 2, common.legend = T, legend = "bottom",
align = "v", heights = c(1, 0.9))
qs42ggsave(paste0(fig_path, "FigS6_2.pdf"), qs42, width = 8, height = 6, useDingbats = FALSE)Fig. S7. Results for the three models to test the effect of including life-history traits (body mass, species range area, mean annual temperature averaged across species range, litter size and generation length) in the model with genetic diversity and speciation rate. The model with speciation rate as predictor suggest the association between genetic diversity and speciation rate is not due to traits affecting both variables. Analyses done for set of 100 posterior trees and MCC tree, showing estimates significance for PGLS analyses (p-value < 0.05) and 95% confidence intervals around median estimates for BMLM analyses.
## mutation rate vs speciation rate
mgsubendivSpRateMutRate <- gendivSpRateMutRate %>%
filter(!set %in% "treeMCC") %>%
group_by(species) %>%
dplyr::summarise(SpRate_mean = mean(tipRate, na.rm = TRUE), SpRate_lower.ci = CI(tipRate,
ci = 0.95)[1], SpRate_upper.ci = CI(tipRate, ci = 0.95)[3], MutRate_mean = mean(mutRate,
na.rm = TRUE), MutRate_lower.ci = CI(mutRate, ci = 0.95)[1], MutRate_upper.ci = CI(mutRate,
ci = 0.95)[3], Ne_mean = mean(Ne, na.rm = TRUE), Ne_lower.ci = CI(Ne, ci = 0.95)[1],
Ne_upper.ci = CI(Ne, ci = 0.95)[3]) %>%
drop_na() %>%
left_join(., clades, by = "species") %>%
filter(clades %in% names(tcol[2:15])) %>%
left_join(., gen.div[, c("species", "EstPi", "subPi_mean", "subPi_upper.CI",
"subPi_lower.CI")], by = "species")library(GGally)
mmdata <- mgsubendivSpRateMutRate %>%
select(species, clades, SpRate_mean, MutRate_mean, Ne_mean, EstPi) %>%
left_join(., traitData[-7], by = "species") %>%
pivot_longer(cols = SpRate_mean:BodyMassKg_notInputed)
mdata <- pivot_wider(mmdata, names_from = name, values_from = value) %>%
select(-mean_temp)
cors <- ggpairs(mdata, columns = 3:9, upper = list(continuous = wrap("smooth", alpha = 0.3,
size = 0.1, se = F)), diag = "blank", lower = "blank", mapping = aes(color = clades),
legend = 2) + scale_y_log10() + scale_x_log10() + scale_color_manual(values = tcol[2:15]) +
theme_bw() + theme(legend.position = "bottom")
corsp0 = ggplot(mgsubendivSpRateMutRate, aes(y = subPi_mean, x = SpRate_mean)) + geom_errorbarh(aes(color = clades,
xmin = SpRate_lower.ci, xmax = SpRate_upper.ci)) + geom_errorbar(aes(color = clades,
ymin = subPi_lower.CI, ymax = subPi_upper.CI)) + geom_point(aes(color = clades),
size = 0.9) + geom_smooth(method = lm, position = "identity") + scale_x_log10(breaks = breaks_log(n = 8),
labels = label_number()) + scale_y_log10(labels = label_scientific(), breaks = breaks_log(n = 6)) +
theme_bw() + labs(x = expression(paste("Speciation rate - ", lambda)), y = expression(paste("Genetic diversity - ",
theta["T"], " Subsampled to 5 individuals"))) + scale_colour_manual(values = tcol[-15]) +
theme(legend.position = "none")
p1 = ggplot(mgsubendivSpRateMutRate, aes(y = subPi_mean, x = MutRate_mean)) + geom_errorbarh(aes(color = clades,
xmin = MutRate_lower.ci, xmax = MutRate_upper.ci)) + geom_errorbar(aes(color = clades,
ymin = subPi_lower.CI, ymax = subPi_upper.CI)) + geom_point(aes(color = clades),
size = 0.9) + geom_smooth(method = lm, position = "identity") + scale_x_log10(breaks = breaks_log(n = 8),
labels = label_scientific()) + scale_y_log10(labels = label_scientific(), breaks = breaks_log(n = 6)) +
theme_bw() + labs(x = expression(paste("Mutation rate - ", mu)), y = expression(paste("Genetic diversity - ",
theta["T"], " Subsampled to 5 individuals"))) + scale_colour_manual(values = tcol[-15]) +
theme(legend.position = "none")
p2 = ggplot(mgsubendivSpRateMutRate, aes(y = subPi_mean, x = Ne_mean)) + geom_errorbarh(aes(color = clades,
xmin = Ne_lower.ci, xmax = Ne_upper.ci)) + geom_errorbar(aes(color = clades,
ymin = subPi_lower.CI, ymax = subPi_upper.CI)) + geom_point(aes(color = clades),
size = 0.9) + geom_smooth(method = lm, position = "identity") + scale_x_log10(breaks = breaks_log(n = 8),
labels = label_scientific()) + scale_y_log10(labels = label_scientific(), breaks = breaks_log(n = 6)) +
theme_bw() + labs(x = expression(paste(N["e"])), y = expression(paste("Genetic diversity - ",
theta["T"], " Subsampled to 5 individuals"))) + scale_colour_manual(values = tcol[-15]) +
theme(legend.position = "none")
q <- ggarrange(p0, p1, p2, ncol = 3)
mmdata <- mgsubendivSpRateMutRate %>%
select(species, clades, SpRate_mean, MutRate_mean, Ne_mean, EstPi) %>%
pivot_longer(cols = SpRate_mean:EstPi)
q2 <- ggplot(mmdata, aes(y = value, color = clades, x = clades)) + geom_boxplot(show.legend = FALSE) +
facet_wrap(~name, nrow = 4, ncol = 1, scales = "free_y", strip.position = "right") +
scale_y_log10() + scale_color_manual(values = tcol[-15]) + theme_bw() + theme(legend.position = "none")
pq <- ggarrange(q, q2, nrow = 2, heights = c(0.5, 1))
pqmmdata <- mgsubendivSpRateMutRate %>%
select(species, clades, SpRate_mean, MutRate_mean, Ne_mean, EstPi) %>%
left_join(., traitData[-7], by = "species") %>%
pivot_longer(cols = SpRate_mean:BodyMassKg_notInputed)
s1 <- ggplot(filter(mmdata, name %in% "EstPi"), aes(y = value, color = clades)) +
geom_boxplot(show.legend = FALSE) + facet_wrap(~clades, ncol = 1, strip.position = "right") +
scale_y_log10() + scale_color_manual(values = tcol[-15]) + theme_bw() + labs(y = "",
x = expression(paste(theta["T"]))) + theme(axis.text.x = element_blank(), axis.ticks.x = element_blank(),
strip.background = element_blank(), strip.text.y = element_blank())
s2 <- ggplot(filter(mmdata, name %in% "MutRate_mean"), aes(y = value, color = clades)) +
geom_boxplot(show.legend = FALSE) + facet_wrap(~clades, ncol = 1, strip.position = "right") +
scale_y_log10() + scale_color_manual(values = tcol[-15]) + theme_bw() + labs(y = "",
x = expression(mu)) + theme(axis.text.x = element_blank(), axis.ticks.x = element_blank(),
strip.background = element_blank(), strip.text.y = element_blank())
s3 <- ggplot(filter(mmdata, name %in% "Ne_mean"), aes(y = value, color = clades)) +
geom_boxplot(show.legend = FALSE) + facet_wrap(~clades, ncol = 1, strip.position = "right") +
scale_y_log10() + scale_color_manual(values = tcol[-15]) + theme_bw() + labs(y = "",
x = expression(paste(N["e"]))) + theme(axis.text.x = element_blank(), axis.ticks.x = element_blank(),
strip.background = element_blank(), strip.text.y = element_blank())
s4 <- ggplot(filter(mmdata, name %in% "SpRate_mean"), aes(y = value, color = clades)) +
geom_boxplot(show.legend = FALSE) + facet_wrap(~clades, ncol = 1, strip.position = "right") +
scale_y_log10() + scale_color_manual(values = tcol[-15]) + theme_bw() + labs(y = "",
x = expression(lambda)) + theme(axis.text.x = element_blank(), axis.ticks.x = element_blank())
# strip.background = element_blank(), strip.text.y = element_blank() )
s5 <- ggplot(filter(mmdata, name %in% "BodyMassKg_notInputed"), aes(y = value, color = clades)) +
geom_boxplot(show.legend = FALSE) + facet_wrap(~clades, ncol = 1, strip.position = "right") +
scale_y_log10() + scale_color_manual(values = tcol[-15]) + theme_bw() + labs(y = "",
x = "Body mass (kg)") + theme(axis.text.x = element_blank(), axis.ticks.x = element_blank(),
strip.background = element_blank(), strip.text.y = element_blank())
s6 <- ggplot(filter(mmdata, name %in% "geoArea_km2"), aes(y = value, color = clades)) +
geom_boxplot(show.legend = FALSE) + facet_wrap(~clades, ncol = 1, strip.position = "right") +
scale_y_log10() + scale_color_manual(values = tcol[-15]) + theme_bw() + labs(y = "",
x = "Range area (km2)") + theme(axis.text.x = element_blank(), axis.ticks.x = element_blank(),
strip.background = element_blank(), strip.text.y = element_blank())
s7 <- ggplot(filter(mmdata, name %in% "GenerationLength_d"), aes(y = value, color = clades)) +
geom_boxplot(show.legend = FALSE) + facet_wrap(~clades, ncol = 1, strip.position = "right") +
scale_y_log10() + scale_color_manual(values = tcol[-15]) + theme_bw() + labs(y = "",
x = "Generation length (d)") + theme(axis.text.x = element_blank(), axis.ticks.x = element_blank(),
strip.background = element_blank(), strip.text.y = element_blank())
ggarrange(s1, s2, s3, s7, s5, s6, s4, ncol = 7)usedTraitDataset <- gendivSpRateTrait %>%
filter(set %in% 'treeMCC') %>%
select(-DispDist_notInputed) %>%
drop_na()
names <- c(BodyMassKg_notInputed = "Body mass",
geoArea_km2 = "Range area",
mean_temp = "Mean range temperature",
litter_or_clutch_size_n = "Litter size",
GenerationLength_d = "Generation length")
divTraits <- usedTraitDataset %>%
select(species, EstPi, BodyMassKg_notInputed, geoArea_km2, mean_temp, litter_or_clutch_size_n, GenerationLength_d) %>%
pivot_longer(cols=BodyMassKg_notInputed:GenerationLength_d) %>%
ggplot(aes(y = EstPi, x = value)) +
geom_point(size = 0.5, alpha = 0.4) +
geom_smooth(method='lm', se = F) +
facet_wrap(~name, scale = "free", ncol = 1,
labeller = labeller(name = names),
strip.position = "bottom") +
labs(x="", y = expression(paste("Genetic diversity " (theta['T']))),
title = "Genetic diversity") +
scale_y_log10() +
scale_x_log10() +
theme_bw() +
theme(#aspect.ratio = 1,
strip.background = element_blank(),
strip.placement = "outside")
spRateTraits <- usedTraitDataset %>%
select(species, tipRate, BodyMassKg_notInputed, geoArea_km2, mean_temp, litter_or_clutch_size_n, GenerationLength_d) %>%
pivot_longer(cols=BodyMassKg_notInputed:GenerationLength_d) %>%
ggplot(aes(y = tipRate, x = value)) +
geom_point(size = 0.5, alpha = 0.4) +
geom_smooth(method='lm', se = F) +
facet_wrap(~name, scale = "free", ncol = 1,
labeller = labeller(name = names),
strip.position = "bottom") +
labs(x="", y = "MCC tree tip speciation rates",
title = "Speciation rates") +
scale_y_log10() +
scale_x_log10() +
theme_bw() +
theme(#aspect.ratio = 1,
strip.background = element_blank(),
strip.placement = "outside")
Traits <- usedTraitDataset %>%
select(species, BodyMassKg_notInputed, geoArea_km2, mean_temp, litter_or_clutch_size_n, GenerationLength_d) %>%
pivot_longer(cols=BodyMassKg_notInputed:GenerationLength_d) %>%
ggplot(aes(x = value)) +
geom_histogram() +
facet_wrap(~name, scale = "free", ncol = 1,
labeller = labeller(name = names),
strip.position = "bottom") +
labs(x = "", y = "Counts") +
scale_x_log10() +
theme_bw() +
theme(panel.grid = element_blank(),
#aspect.ratio = 1,
strip.background = element_blank(),
strip.placement = "outside")
tds <- ggarrange(Traits, divTraits, spRateTraits, ncol = 3, align = 'hv')
tdsggsave(paste0(fig_path, 'other/FigSXX_traitsPlotting.pdf'), tds, width = 7, height = 10, useDingbats = FALSE )Fig. SXX. Dataset used in analyses with life-history traits (1004 spp.). Histogram for each trait with x-axis log scaled (A). Linear relationship between genetic diversity (B) and tip speciation rates (from MCC tree; C) with each used trait, both axis are log scaled.
# http://www.mpcm-evolution.com/practice/online-practical-material-chapter-6/chapter-6-1exercises-testing-assumptions-statistical-issues-framework-phylogenetic-generalized-least-squares
library(car)
### prepare data to be the same as used for PGLS analyses
load(paste0(folder_path, "speciation_rate/output/upham_4064sp_FR_MCCposterior100.rdata")) #loads TreeSet
DataSubsetMCC <- gendivSpRateTrait %>%
filter(set %in% "treeMCC") %>%
select(species, clades, tipRate, EstPi, BodyMassKg_notInputed, geoArea_km2, mean_temp,
litter_or_clutch_size_n, GenerationLength_d) %>%
drop_na()
treeMCC <- TreeSet[["treeMCC"]]
TreeSubset <- drop.tip(treeMCC, as.character(treeMCC$tip.label[which(!treeMCC$tip.label %in%
DataSubsetMCC$species)]))
datacomp <- comparative.data(data = DataSubsetMCC, phy = TreeSubset, names.col = "species",
vcv = TRUE, na.omit = TRUE, warn.dropped = TRUE)
lm.res = lm(log(EstPi) ~ log(tipRate) + log(BodyMassKg_notInputed) + log(geoArea_km2) +
log(mean_temp) + log(litter_or_clutch_size_n) + log(GenerationLength_d), data = DataSubsetMCC)
vif(mod = lm.res)## log(tipRate) log(BodyMassKg_notInputed)
## 1.063279 1.704704
## log(geoArea_km2) log(mean_temp)
## 1.053977 1.164662
## log(litter_or_clutch_size_n) log(GenerationLength_d)
## 2.102306 3.063934
Using variance inflation factors we can verify if there is a big problem with multicollinearity for the linear model (not sure if there is a way to verify this for a PGLS). Since all values are below 10, which normally values above is commonly considered indicative of a problem, it seems unlikely the collinearity between these variables would be a problem.
par(mfrow = c(2, 2))
summary(lm(log(EstPi) ~ log(BodyMassKg_notInputed), data = datacomp$data))##
## Call:
## lm(formula = log(EstPi) ~ log(BodyMassKg_notInputed), data = datacomp$data)
##
## Residuals:
## Min 1Q Median 3Q Max
## -4.1749 -0.6113 0.1553 0.6946 1.9319
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -4.10449 0.03443 -119.22 < 2e-16 ***
## log(BodyMassKg_notInputed) -0.08152 0.01041 -7.83 1.24e-14 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.9647 on 1002 degrees of freedom
## Multiple R-squared: 0.05766, Adjusted R-squared: 0.05672
## F-statistic: 61.31 on 1 and 1002 DF, p-value: 1.238e-14
# modpiBM <- pgls(log(EstPi) ~ log(BodyMassKg_notInputed), data = datacomp,
# lambda = 'ML') saveRDS(modpiBM,
# paste0(folder_path,'pgls/extra/pglspiBM.rds'))
modpiBM <- readRDS(paste0(folder_path, "pgls/extra/pglspiBM.rds"))
summary(modpiBM)##
## Call:
## pgls(formula = log(EstPi) ~ log(BodyMassKg_notInputed), data = datacomp,
## lambda = "ML")
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.30828 -0.06564 -0.00734 0.05576 0.31807
##
## Branch length transformations:
##
## kappa [Fix] : 1.000
## lambda [ ML] : 0.568
## lower bound : 0.000, p = < 2.22e-16
## upper bound : 1.000, p = < 2.22e-16
## 95.0% CI : (0.410, 0.696)
## delta [Fix] : 1.000
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -4.01495 0.56474 -7.1094 2.212e-12 ***
## log(BodyMassKg_notInputed) -0.12415 0.02292 -5.4168 7.598e-08 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.09526 on 1002 degrees of freedom
## Multiple R-squared: 0.02845, Adjusted R-squared: 0.02748
## F-statistic: 29.34 on 1 and 1002 DF, p-value: 7.598e-08
plot(modpiBM)summary(lm(log(tipRate) ~ log(BodyMassKg_notInputed), data = datacomp$data))##
## Call:
## lm(formula = log(tipRate) ~ log(BodyMassKg_notInputed), data = datacomp$data)
##
## Residuals:
## Min 1Q Median 3Q Max
## -1.89281 -0.27891 -0.01674 0.29959 1.58359
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -1.468066 0.017924 -81.904 <2e-16 ***
## log(BodyMassKg_notInputed) 0.011841 0.005421 2.184 0.0292 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.5022 on 1002 degrees of freedom
## Multiple R-squared: 0.004739, Adjusted R-squared: 0.003746
## F-statistic: 4.771 on 1 and 1002 DF, p-value: 0.02917
# modspRateBM <- pgls(log(tipRate) ~ log(BodyMassKg_notInputed), data =
# datacomp, lambda = 'ML') saveRDS(modspRateBM,
# paste0(folder_path,'pgls/extra/pglsspRateBM.rds'))
modspRateBM <- readRDS(paste0(folder_path, "pgls/extra/pglsspRateBM.rds"))
summary(modspRateBM)##
## Call:
## pgls(formula = log(tipRate) ~ log(BodyMassKg_notInputed), data = datacomp,
## lambda = "ML")
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.27134 -0.04183 -0.00030 0.03999 0.25566
##
## Branch length transformations:
##
## kappa [Fix] : 1.000
## lambda [ ML] : 0.993
## lower bound : 0.000, p = < 2.22e-16
## upper bound : 1.000, p = < 2.22e-16
## 95.0% CI : (0.991, 0.996)
## delta [Fix] : 1.000
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -2.5229203 0.4957941 -5.0886 4.305e-07 ***
## log(BodyMassKg_notInputed) 0.0041917 0.0088750 0.4723 0.6368
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.06804 on 1002 degrees of freedom
## Multiple R-squared: 0.0002226, Adjusted R-squared: -0.0007752
## F-statistic: 0.2231 on 1 and 1002 DF, p-value: 0.6368
plot(modspRateBM)summary(lm(log(EstPi) ~ log(tipRate) + log(BodyMassKg_notInputed), data = datacomp$data))##
## Call:
## lm(formula = log(EstPi) ~ log(tipRate) + log(BodyMassKg_notInputed),
## data = datacomp$data)
##
## Residuals:
## Min 1Q Median 3Q Max
## -3.7804 -0.5955 0.1438 0.6774 2.0556
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -4.63779 0.09383 -49.429 < 2e-16 ***
## log(tipRate) -0.36327 0.05962 -6.094 1.57e-09 ***
## log(BodyMassKg_notInputed) -0.07722 0.01025 -7.531 1.12e-13 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.9478 on 1001 degrees of freedom
## Multiple R-squared: 0.09136, Adjusted R-squared: 0.08955
## F-statistic: 50.32 on 2 and 1001 DF, p-value: < 2.2e-16
# modpirateBM <- pgls(log(EstPi) ~ log(tipRate) + log(BodyMassKg_notInputed),
# data = datacomp, lambda = 'ML') saveRDS(modpirateBM,
# paste0(folder_path,'pgls/extra/pglspirateBM.rds'))
modpirateBM <- readRDS(paste0(folder_path, "pgls/extra/pglspirateBM.rds"))
summary(modpirateBM)##
## Call:
## pgls(formula = log(EstPi) ~ log(tipRate) + log(BodyMassKg_notInputed),
## data = datacomp, lambda = "ML")
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.25237 -0.05717 0.00122 0.06204 0.29693
##
## Branch length transformations:
##
## kappa [Fix] : 1.000
## lambda [ ML] : 0.474
## lower bound : 0.000, p = 9.9143e-14
## upper bound : 1.000, p = < 2.22e-16
## 95.0% CI : (0.297, 0.629)
## delta [Fix] : 1.000
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -4.842380 0.509536 -9.5035 < 2.2e-16 ***
## log(tipRate) -0.381034 0.078052 -4.8818 1.223e-06 ***
## log(BodyMassKg_notInputed) -0.118374 0.021318 -5.5528 3.600e-08 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.08714 on 1001 degrees of freedom
## Multiple R-squared: 0.05154, Adjusted R-squared: 0.04964
## F-statistic: 27.2 on 2 and 1001 DF, p-value: 3.153e-12
plot(modpirateBM)par(mfrow = c(2, 2))
summary(lm(log(EstPi) ~ log(geoArea_km2), data = datacomp$data))##
## Call:
## lm(formula = log(EstPi) ~ log(geoArea_km2), data = datacomp$data)
##
## Residuals:
## Min 1Q Median 3Q Max
## -3.8492 -0.6255 0.1365 0.7316 1.9273
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -5.06969 0.20325 -24.943 < 2e-16 ***
## log(geoArea_km2) 0.07857 0.01447 5.431 7.02e-08 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.9795 on 1002 degrees of freedom
## Multiple R-squared: 0.0286, Adjusted R-squared: 0.02763
## F-statistic: 29.5 on 1 and 1002 DF, p-value: 7.023e-08
# modpiArea <- pgls(log(EstPi) ~ log(geoArea_km2), data = datacomp, lambda =
# 'ML') saveRDS(modpiArea, paste0(folder_path,'pgls/extra/pglspiArea.rds'))
modpiArea <- readRDS(paste0(folder_path, "pgls/extra/pglspiArea.rds"))
summary(modpiArea) ### positivelly correlated##
## Call:
## pgls(formula = log(EstPi) ~ log(geoArea_km2), data = datacomp,
## lambda = "ML")
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.301173 -0.061473 -0.000233 0.067502 0.255721
##
## Branch length transformations:
##
## kappa [Fix] : 1.000
## lambda [ ML] : 0.618
## lower bound : 0.000, p = < 2.22e-16
## upper bound : 1.000, p = < 2.22e-16
## 95.0% CI : (0.476, 0.730)
## delta [Fix] : 1.000
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -5.643268 0.633533 -8.9076 < 2.2e-16 ***
## log(geoArea_km2) 0.123812 0.014847 8.3390 2.22e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.09801 on 1002 degrees of freedom
## Multiple R-squared: 0.0649, Adjusted R-squared: 0.06396
## F-statistic: 69.54 on 1 and 1002 DF, p-value: 2.454e-16
plot(modpiArea)summary(lm(log(tipRate) ~ log(geoArea_km2), data = datacomp$data))##
## Call:
## lm(formula = log(tipRate) ~ log(geoArea_km2), data = datacomp$data)
##
## Residuals:
## Min 1Q Median 3Q Max
## -1.87475 -0.28650 -0.00506 0.30714 1.48899
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -1.079662 0.103657 -10.42 < 2e-16 ***
## log(geoArea_km2) -0.029287 0.007378 -3.97 7.71e-05 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.4995 on 1002 degrees of freedom
## Multiple R-squared: 0.01548, Adjusted R-squared: 0.0145
## F-statistic: 15.76 on 1 and 1002 DF, p-value: 7.714e-05
# modspRateArea <- pgls(log(tipRate) ~ log(geoArea_km2), data = datacomp,
# lambda = 'ML') saveRDS(modspRateArea,
# paste0(folder_path,'pgls/extra/pglsspRateArea.rds'))
modspRateArea <- readRDS(paste0(folder_path, "pgls/extra/pglsspRateArea.rds"))
summary(modspRateArea) ### not correlated##
## Call:
## pgls(formula = log(tipRate) ~ log(geoArea_km2), data = datacomp,
## lambda = "ML")
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.270816 -0.045761 -0.006045 0.035765 0.240372
##
## Branch length transformations:
##
## kappa [Fix] : 1.000
## lambda [ ML] : 0.993
## lower bound : 0.000, p = < 2.22e-16
## upper bound : 1.000, p = < 2.22e-16
## 95.0% CI : (0.990, 0.996)
## delta [Fix] : 1.000
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -2.4666147 0.4963273 -4.9697 7.881e-07 ***
## log(geoArea_km2) -0.0042257 0.0028760 -1.4693 0.1421
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.0679 on 1002 degrees of freedom
## Multiple R-squared: 0.00215, Adjusted R-squared: 0.001154
## F-statistic: 2.159 on 1 and 1002 DF, p-value: 0.1421
plot(modspRateArea)summary(lm(log(EstPi) ~ log(tipRate) + log(geoArea_km2), data = datacomp$data))##
## Call:
## lm(formula = log(EstPi) ~ log(tipRate) + log(geoArea_km2), data = datacomp$data)
##
## Residuals:
## Min 1Q Median 3Q Max
## -3.6110 -0.6042 0.1416 0.7106 1.9175
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -5.45641 0.21048 -25.924 < 2e-16 ***
## log(tipRate) -0.35819 0.06093 -5.878 5.64e-09 ***
## log(geoArea_km2) 0.06808 0.01434 4.747 2.36e-06 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.9635 on 1001 degrees of freedom
## Multiple R-squared: 0.06101, Adjusted R-squared: 0.05914
## F-statistic: 32.52 on 2 and 1001 DF, p-value: 2.07e-14
# modpirateArea <- pgls(log(EstPi) ~ log(tipRate) + log(geoArea_km2), data =
# datacomp, lambda = 'ML') saveRDS(modpirateArea,
# paste0(folder_path,'pgls/extra/pglspirateArea.rds'))
modpirateArea <- readRDS(paste0(folder_path, "pgls/extra/pglspirateArea.rds"))
summary(modpirateArea) ### as expected, spRate negatively correlated and geographic range positively##
## Call:
## pgls(formula = log(EstPi) ~ log(tipRate) + log(geoArea_km2),
## data = datacomp, lambda = "ML")
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.284495 -0.064448 -0.006408 0.054472 0.283588
##
## Branch length transformations:
##
## kappa [Fix] : 1.000
## lambda [ ML] : 0.563
## lower bound : 0.000, p = < 2.22e-16
## upper bound : 1.000, p = < 2.22e-16
## 95.0% CI : (0.406, 0.693)
## delta [Fix] : 1.000
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -6.245061 0.603871 -10.3417 < 2.2e-16 ***
## log(tipRate) -0.308851 0.081035 -3.8113 0.0001467 ***
## log(geoArea_km2) 0.117460 0.014842 7.9141 6.661e-15 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.09243 on 1001 degrees of freedom
## Multiple R-squared: 0.07753, Adjusted R-squared: 0.07568
## F-statistic: 42.06 on 2 and 1001 DF, p-value: < 2.2e-16
plot(modpirateArea)par(mfrow = c(2, 2))
summary(lm(log(EstPi) ~ log(mean_temp), data = datacomp$data))##
## Call:
## lm(formula = log(EstPi) ~ log(mean_temp), data = datacomp$data)
##
## Residuals:
## Min 1Q Median 3Q Max
## -4.1106 -0.6373 0.1544 0.7356 2.3511
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -6.16377 0.46104 -13.37 < 2e-16 ***
## log(mean_temp) 0.37988 0.07997 4.75 2.33e-06 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.9828 on 1002 degrees of freedom
## Multiple R-squared: 0.02203, Adjusted R-squared: 0.02105
## F-statistic: 22.57 on 1 and 1002 DF, p-value: 2.326e-06
# modpiTemp <- pgls(log(EstPi) ~ log(mean_temp), data = datacomp, lambda =
# 'ML') saveRDS(modpiTemp, paste0(folder_path,'pgls/extra/pglspiTemp.rds'))
modpiTemp <- readRDS(paste0(folder_path, "pgls/extra/pglspiTemp.rds"))
summary(modpiTemp)##
## Call:
## pgls(formula = log(EstPi) ~ log(mean_temp), data = datacomp,
## lambda = "ML")
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.35028 -0.06019 0.00117 0.06128 0.29070
##
## Branch length transformations:
##
## kappa [Fix] : 1.000
## lambda [ ML] : 0.555
## lower bound : 0.000, p = < 2.22e-16
## upper bound : 1.000, p = < 2.22e-16
## 95.0% CI : (0.392, 0.687)
## delta [Fix] : 1.000
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -6.138505 0.767286 -8.0003 3.553e-15 ***
## log(mean_temp) 0.375371 0.090557 4.1452 3.683e-05 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.09466 on 1002 degrees of freedom
## Multiple R-squared: 0.01686, Adjusted R-squared: 0.01588
## F-statistic: 17.18 on 1 and 1002 DF, p-value: 3.683e-05
plot(modpiTemp)summary(lm(log(tipRate) ~ log(mean_temp), data = datacomp$data))##
## Call:
## lm(formula = log(tipRate) ~ log(mean_temp), data = datacomp$data)
##
## Residuals:
## Min 1Q Median 3Q Max
## -1.86817 -0.27258 -0.00459 0.30248 1.57693
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -0.31405 0.23323 -1.346 0.178
## log(mean_temp) -0.20380 0.04046 -5.038 5.59e-07 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.4972 on 1002 degrees of freedom
## Multiple R-squared: 0.0247, Adjusted R-squared: 0.02373
## F-statistic: 25.38 on 1 and 1002 DF, p-value: 5.587e-07
# modspRateTemp <- pgls(log(tipRate) ~ log(mean_temp), data = datacomp, lambda
# = 'ML') saveRDS(modspRateTemp,
# paste0(folder_path,'pgls/extra/pglsspRateTemp.rds'))
modspRateTemp <- readRDS(paste0(folder_path, "pgls/extra/pglsspRateTemp.rds"))
summary(modspRateTemp)##
## Call:
## pgls(formula = log(tipRate) ~ log(mean_temp), data = datacomp,
## lambda = "ML")
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.266146 -0.043390 -0.002865 0.040920 0.240823
##
## Branch length transformations:
##
## kappa [Fix] : 1.000
## lambda [ ML] : 0.993
## lower bound : 0.000, p = < 2.22e-16
## upper bound : 1.000, p = < 2.22e-16
## 95.0% CI : (0.990, 0.996)
## delta [Fix] : 1.000
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -2.387185 0.505741 -4.7202 2.691e-06 ***
## log(mean_temp) -0.023511 0.017799 -1.3209 0.1868
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.06792 on 1002 degrees of freedom
## Multiple R-squared: 0.001738, Adjusted R-squared: 0.000742
## F-statistic: 1.745 on 1 and 1002 DF, p-value: 0.1868
plot(modspRateTemp)summary(lm(log(EstPi) ~ log(tipRate) + log(mean_temp), data = datacomp$data))##
## Call:
## lm(formula = log(EstPi) ~ log(tipRate) + log(mean_temp), data = datacomp$data)
##
## Residuals:
## Min 1Q Median 3Q Max
## -4.0700 -0.5911 0.1573 0.7200 2.0265
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -6.27587 0.45410 -13.821 < 2e-16 ***
## log(tipRate) -0.35696 0.06145 -5.809 8.45e-09 ***
## log(mean_temp) 0.30714 0.07968 3.854 0.000123 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.9671 on 1001 degrees of freedom
## Multiple R-squared: 0.05392, Adjusted R-squared: 0.05203
## F-statistic: 28.52 on 2 and 1001 DF, p-value: 8.969e-13
# modpirateTemp <- pgls(log(EstPi) ~ log(tipRate) + log(mean_temp), data =
# datacomp, lambda = 'ML') saveRDS(modpirateTemp,
# paste0(folder_path,'pgls/extra/pglspirateTemp.rds'))
modpirateTemp <- readRDS(paste0(folder_path, "pgls/extra/pglspirateTemp.rds"))
summary(modpirateTemp)##
## Call:
## pgls(formula = log(EstPi) ~ log(tipRate) + log(mean_temp), data = datacomp,
## lambda = "ML")
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.26611 -0.05683 0.00251 0.05509 0.31879
##
## Branch length transformations:
##
## kappa [Fix] : 1.000
## lambda [ ML] : 0.475
## lower bound : 0.000, p = < 2.22e-16
## upper bound : 1.000, p = < 2.22e-16
## 95.0% CI : (0.300, 0.631)
## delta [Fix] : 1.000
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -6.709641 0.722235 -9.2901 < 2.2e-16 ***
## log(tipRate) -0.349087 0.079102 -4.4131 1.13e-05 ***
## log(mean_temp) 0.343797 0.089642 3.8352 0.0001333 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.08792 on 1001 degrees of freedom
## Multiple R-squared: 0.03644, Adjusted R-squared: 0.03452
## F-statistic: 18.93 on 2 and 1001 DF, p-value: 8.534e-09
plot(modpirateTemp)par(mfrow = c(2, 2))
summary(lm(log(EstPi) ~ log(litter_or_clutch_size_n), data = datacomp$data))##
## Call:
## lm(formula = log(EstPi) ~ log(litter_or_clutch_size_n), data = datacomp$data)
##
## Residuals:
## Min 1Q Median 3Q Max
## -4.1145 -0.6182 0.1446 0.7472 1.9798
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -3.96951 0.05139 -77.245 <2e-16 ***
## log(litter_or_clutch_size_n) -0.01034 0.04621 -0.224 0.823
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.9937 on 1002 degrees of freedom
## Multiple R-squared: 4.996e-05, Adjusted R-squared: -0.000948
## F-statistic: 0.05007 on 1 and 1002 DF, p-value: 0.823
# modpiLitter <- pgls(log(EstPi) ~ log(litter_or_clutch_size_n), data =
# datacomp, lambda = 'ML') saveRDS(modpiLitter,
# paste0(folder_path,'pgls/extra/pglspiLitter.rds'))
modpiLitter <- readRDS(paste0(folder_path, "pgls/extra/pglspiLitter.rds"))
summary(modpiLitter)##
## Call:
## pgls(formula = log(EstPi) ~ log(litter_or_clutch_size_n), data = datacomp,
## lambda = "ML")
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.30632 -0.06846 -0.00638 0.05812 0.32566
##
## Branch length transformations:
##
## kappa [Fix] : 1.000
## lambda [ ML] : 0.583
## lower bound : 0.000, p = < 2.22e-16
## upper bound : 1.000, p = < 2.22e-16
## 95.0% CI : (0.431, 0.705)
## delta [Fix] : 1.000
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -3.752108 0.590230 -6.3570 3.119e-10 ***
## log(litter_or_clutch_size_n) -0.212593 0.090264 -2.3552 0.0187 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.09766 on 1002 degrees of freedom
## Multiple R-squared: 0.005506, Adjusted R-squared: 0.004513
## F-statistic: 5.547 on 1 and 1002 DF, p-value: 0.0187
plot(modpiLitter)summary(lm(log(tipRate) ~ log(litter_or_clutch_size_n), data = datacomp$data))##
## Call:
## lm(formula = log(tipRate) ~ log(litter_or_clutch_size_n), data = datacomp$data)
##
## Residuals:
## Min 1Q Median 3Q Max
## -1.87249 -0.28288 -0.01363 0.30216 1.58562
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -1.47395 0.02603 -56.628 <2e-16 ***
## log(litter_or_clutch_size_n) -0.01407 0.02341 -0.601 0.548
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.5033 on 1002 degrees of freedom
## Multiple R-squared: 0.0003604, Adjusted R-squared: -0.0006372
## F-statistic: 0.3613 on 1 and 1002 DF, p-value: 0.5479
# modspRateLitter <- pgls(log(tipRate) ~ log(litter_or_clutch_size_n), data =
# datacomp, lambda = 'ML') saveRDS(modspRateLitter, paste0(folder_path,
# 'pgls/extra/pglsspRateLitter.rds'))
modspRateLitter <- readRDS(paste0(folder_path, "pgls/extra/pglsspRateLitter.rds"))
summary(modspRateLitter)##
## Call:
## pgls(formula = log(tipRate) ~ log(litter_or_clutch_size_n), data = datacomp,
## lambda = "ML")
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.274798 -0.041920 -0.001218 0.041449 0.256386
##
## Branch length transformations:
##
## kappa [Fix] : 1.000
## lambda [ ML] : 0.993
## lower bound : 0.000, p = < 2.22e-16
## upper bound : 1.000, p = < 2.22e-16
## 95.0% CI : (0.991, 0.996)
## delta [Fix] : 1.000
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -2.564613 0.495743 -5.1733 2.778e-07 ***
## log(litter_or_clutch_size_n) 0.046781 0.027425 1.7058 0.08836 .
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.06796 on 1002 degrees of freedom
## Multiple R-squared: 0.002895, Adjusted R-squared: 0.0019
## F-statistic: 2.91 on 1 and 1002 DF, p-value: 0.08836
plot(modspRateLitter)summary(lm(log(EstPi) ~ log(tipRate) + log(litter_or_clutch_size_n), data = datacomp$data))##
## Call:
## lm(formula = log(EstPi) ~ log(tipRate) + log(litter_or_clutch_size_n),
## data = datacomp$data)
##
## Residuals:
## Min 1Q Median 3Q Max
## -4.0685 -0.5923 0.1510 0.7406 1.9176
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -4.55111 0.10325 -44.080 < 2e-16 ***
## log(tipRate) -0.39459 0.06114 -6.454 1.7e-10 ***
## log(litter_or_clutch_size_n) -0.01589 0.04531 -0.351 0.726
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.9742 on 1001 degrees of freedom
## Multiple R-squared: 0.03999, Adjusted R-squared: 0.03807
## F-statistic: 20.85 on 2 and 1001 DF, p-value: 1.344e-09
# modpirateLitter <- pgls(log(EstPi) ~ log(tipRate) +
# log(litter_or_clutch_size_n), data = datacomp, lambda = 'ML')
# saveRDS(modpirateLitter,
# paste0(folder_path,'pgls/extra/pglspirateLitter.rds'))
modpirateLitter <- readRDS(paste0(folder_path, "pgls/extra/pglspirateLitter.rds"))
summary(modpirateLitter)##
## Call:
## pgls(formula = log(EstPi) ~ log(tipRate) + log(litter_or_clutch_size_n),
## data = datacomp, lambda = "ML")
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.307010 -0.060652 -0.004758 0.057641 0.253436
##
## Branch length transformations:
##
## kappa [Fix] : 1.000
## lambda [ ML] : 0.500
## lower bound : 0.000, p = < 2.22e-16
## upper bound : 1.000, p = < 2.22e-16
## 95.0% CI : (0.329, 0.647)
## delta [Fix] : 1.000
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -4.581298 0.543776 -8.4250 < 2.2e-16 ***
## log(tipRate) -0.369008 0.080211 -4.6005 4.755e-06 ***
## log(litter_or_clutch_size_n) -0.184746 0.086085 -2.1461 0.03211 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.09005 on 1001 degrees of freedom
## Multiple R-squared: 0.02607, Adjusted R-squared: 0.02413
## F-statistic: 13.4 on 2 and 1001 DF, p-value: 1.808e-06
plot(modpirateLitter)Check results are consistent if excluding species with only size litter of 1
par(mfrow = c(2, 2))
# DataSubsetMCC1 <- gendivSpRateTrait %>% filter(set %in% 'treeMCC',
# litter_or_clutch_size_n > 1) %>% select(species, clades, tipRate, EstPi,
# BodyMassKg_notInputed, geoArea_km2, mean_temp, litter_or_clutch_size_n,
# GenerationLength_d) %>% drop_na() t1 <- TreeSet[['treeMCC']] TreeSubset1 <-
# drop.tip(treeMCC, as.character(t1$tip.label[which(!t1$tip.label %in%
# DataSubsetMCC1$species)])) datacomp1 <- comparative.data(data =
# DataSubsetMCC1, phy = TreeSubset1, names.col = 'species', vcv = TRUE, na.omit
# = TRUE, warn.dropped = TRUE) summary(lm(log(EstPi) ~
# log(litter_or_clutch_size_n), data = datacomp1$data)) modpiLitter1 <-
# pgls(log(EstPi) ~ log(litter_or_clutch_size_n), data = datacomp1, lambda =
# 'ML') saveRDS(modpiLitter1,
# paste0(folder_path,'pgls/extra/pglspiLitter1.rds'))
modpiLitter1 <- readRDS(paste0(folder_path, "pgls/extra/pglspiLitter1.rds"))
summary(modpiLitter1)##
## Call:
## pgls(formula = log(EstPi) ~ log(litter_or_clutch_size_n), data = datacomp1,
## lambda = "ML")
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.25613 -0.05330 0.00140 0.05993 0.36957
##
## Branch length transformations:
##
## kappa [Fix] : 1.000
## lambda [ ML] : 0.523
## lower bound : 0.000, p = < 2.22e-16
## upper bound : 1.000, p = < 2.22e-16
## 95.0% CI : (0.349, 0.673)
## delta [Fix] : 1.000
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -3.624023 0.543999 -6.6618 5.136e-11 ***
## log(litter_or_clutch_size_n) -0.238154 0.097245 -2.4490 0.01455 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.09275 on 773 degrees of freedom
## Multiple R-squared: 0.007699, Adjusted R-squared: 0.006415
## F-statistic: 5.998 on 1 and 773 DF, p-value: 0.01455
plot(modpiLitter1)par(mfrow = c(2, 2))
summary(lm(log(EstPi) ~ log(GenerationLength_d), data = datacomp$data))##
## Call:
## lm(formula = log(EstPi) ~ log(GenerationLength_d), data = datacomp$data)
##
## Residuals:
## Min 1Q Median 3Q Max
## -4.0668 -0.6219 0.1339 0.7352 1.9834
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -2.48262 0.28743 -8.637 < 2e-16 ***
## log(GenerationLength_d) -0.21058 0.04022 -5.235 2.01e-07 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.9804 on 1002 degrees of freedom
## Multiple R-squared: 0.02662, Adjusted R-squared: 0.02565
## F-statistic: 27.41 on 1 and 1002 DF, p-value: 2.008e-07
# modpigenLengh <- pgls(log(EstPi) ~ log(GenerationLength_d), data = datacomp,
# lambda = 'ML') saveRDS(modpigenLengh,
# paste0(folder_path,'pgls/extra/pglspigenLengh.rds'))
modpigenLengh <- readRDS(paste0(folder_path, "pgls/extra/pglspigenLengh.rds"))
summary(modpigenLengh)##
## Call:
## pgls(formula = log(EstPi) ~ log(GenerationLength_d), data = datacomp,
## lambda = "ML")
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.32590 -0.06920 -0.00856 0.05637 0.33915
##
## Branch length transformations:
##
## kappa [Fix] : 1.000
## lambda [ ML] : 0.568
## lower bound : 0.000, p = < 2.22e-16
## upper bound : 1.000, p = < 2.22e-16
## 95.0% CI : (0.412, 0.695)
## delta [Fix] : 1.000
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -2.325281 0.878666 -2.6464 0.008263 **
## log(GenerationLength_d) -0.221501 0.091324 -2.4254 0.015465 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.09633 on 1002 degrees of freedom
## Multiple R-squared: 0.005837, Adjusted R-squared: 0.004845
## F-statistic: 5.883 on 1 and 1002 DF, p-value: 0.01547
plot(modpigenLengh)summary(lm(log(tipRate) ~ log(GenerationLength_d), data = datacomp$data))##
## Call:
## lm(formula = log(tipRate) ~ log(GenerationLength_d), data = datacomp$data)
##
## Residuals:
## Min 1Q Median 3Q Max
## -1.91217 -0.27645 -0.01196 0.29220 1.61260
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -1.83549 0.14717 -12.472 <2e-16 ***
## log(GenerationLength_d) 0.04915 0.02060 2.386 0.0172 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.502 on 1002 degrees of freedom
## Multiple R-squared: 0.005651, Adjusted R-squared: 0.004659
## F-statistic: 5.694 on 1 and 1002 DF, p-value: 0.0172
# modSpRategenLengh <- pgls(log(tipRate) ~ log(GenerationLength_d), data =
# datacomp, lambda = 'ML') saveRDS(modSpRategenLengh, paste0(folder_path,
# 'pgls/extra/pglsSpRategenLengh.rds'))
modSpRategenLengh <- readRDS(paste0(folder_path, "pgls/extra/pglsSpRategenLengh.rds"))
summary(modSpRategenLengh)##
## Call:
## pgls(formula = log(tipRate) ~ log(GenerationLength_d), data = datacomp,
## lambda = "ML")
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.269637 -0.045360 -0.003921 0.036390 0.245839
##
## Branch length transformations:
##
## kappa [Fix] : 1.000
## lambda [ ML] : 0.993
## lower bound : 0.000, p = < 2.22e-16
## upper bound : 1.000, p = < 2.22e-16
## 95.0% CI : (0.990, 0.996)
## delta [Fix] : 1.000
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -2.4769954 0.5378322 -4.6055 4.644e-06 ***
## log(GenerationLength_d) -0.0064197 0.0282129 -0.2275 0.82
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.06797 on 1002 degrees of freedom
## Multiple R-squared: 5.167e-05, Adjusted R-squared: -0.0009463
## F-statistic: 0.05178 on 1 and 1002 DF, p-value: 0.82
plot(modSpRategenLengh)summary(lm(log(EstPi) ~ log(tipRate) + log(GenerationLength_d), data = datacomp$data))##
## Call:
## lm(formula = log(EstPi) ~ log(tipRate) + log(GenerationLength_d),
## data = datacomp$data)
##
## Residuals:
## Min 1Q Median 3Q Max
## -4.0216 -0.6083 0.1065 0.7012 2.0922
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -3.16556 0.30343 -10.433 < 2e-16 ***
## log(tipRate) -0.37207 0.06060 -6.140 1.19e-09 ***
## log(GenerationLength_d) -0.19230 0.03962 -4.854 1.41e-06 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.963 on 1001 degrees of freedom
## Multiple R-squared: 0.06195, Adjusted R-squared: 0.06008
## F-statistic: 33.05 on 2 and 1001 DF, p-value: 1.256e-14
# modpiSpRategenLengh <- pgls(log(EstPi) ~ log(tipRate) +
# log(GenerationLength_d), data = datacomp, lambda = 'ML')
# saveRDS(modpiSpRategenLengh, paste0(folder_path,
# 'pgls/extra/pglspiSpRategenLengh.rds'))
modpiSpRategenLengh <- readRDS(paste0(folder_path, "pgls/extra/pglspiSpRategenLengh.rds"))
summary(modpiSpRategenLengh)##
## Call:
## pgls(formula = log(EstPi) ~ log(tipRate) + log(GenerationLength_d),
## data = datacomp, lambda = "ML")
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.262485 -0.053856 0.000991 0.057396 0.304842
##
## Branch length transformations:
##
## kappa [Fix] : 1.000
## lambda [ ML] : 0.475
## lower bound : 0.000, p = 1.2212e-15
## upper bound : 1.000, p = < 2.22e-16
## 95.0% CI : (0.302, 0.628)
## delta [Fix] : 1.000
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -3.105725 0.808492 -3.8414 0.000130 ***
## log(tipRate) -0.383618 0.079024 -4.8545 1.4e-06 ***
## log(GenerationLength_d) -0.229234 0.085955 -2.6669 0.007779 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.08823 on 1001 degrees of freedom
## Multiple R-squared: 0.02919, Adjusted R-squared: 0.02725
## F-statistic: 15.05 on 2 and 1001 DF, p-value: 3.634e-07
plot(modpiSpRategenLengh)##### maybe should use a different variable for longevity
dt <- read.table("/Users/acas/Dropbox/Post-docs/Morlon_Lab/manuscript_projects_info/mammals_genDiv_SpRate/manuscript_scripts_data/traits/input/GenerationLenghtMammals_Pacifici2013.txt",
header = T, sep = "\t", stringsAsFactors = F) %>%
mutate(species = str_replace(Scientific_name, " ", "_")) %>%
mutate_at(vars(ends_with("_d")), as.numeric) %>%
right_join(select(gen.div, species, EstPi)) %>%
drop_na(Genus)
a <- boxplot(log(na.omit(dt$Rspan_d)))
out <- dt[log(na.omit(dt$Rspan_d)) %in% a$out, "species"]
DataSubset <- dt %>%
select(species, EstPi, Rspan_d) %>%
filter(!duplicated(species), !species %in% out) %>%
drop_na()
TreeSubset <- drop.tip(treeMCC, as.character(treeMCC$tip.label[which(!treeMCC$tip.label %in%
DataSubset$species)]))
datacompGlobalTraits <- comparative.data(data = DataSubset, phy = TreeSubset, names.col = "species",
vcv = TRUE, na.omit = TRUE, warn.dropped = TRUE)
mod <- pgls(log(EstPi) ~ log(Rspan_d), data = datacompGlobalTraits, lambda = "ML")
summary(mod)
##### Rspan_d negative sign - species reproductive life span - longevity is
##### inversily related with genetic diversity
a <- boxplot(log(na.omit(dt$AFR_d)))
out <- dt[log(na.omit(dt$AFR_d)) %in% a$out, "species"]
DataSubset <- dt %>%
select(species, EstPi, AFR_d) %>%
filter(!duplicated(species), !species %in% out) %>%
drop_na()
TreeSubset <- drop.tip(treeMCC, as.character(treeMCC$tip.label[which(!treeMCC$tip.label %in%
DataSubset$species)]))
datacompGlobalTraits <- comparative.data(data = DataSubset, phy = TreeSubset, names.col = "species",
vcv = TRUE, na.omit = TRUE, warn.dropped = TRUE)
mod <- pgls(log(EstPi) ~ log(AFR_d), data = datacompGlobalTraits, lambda = "ML")
summary(mod)
##### AFR_d (age at first reproduction) is negative sign
#############################
a <- boxplot(log(na.omit(dt$Calculated_GL_d)))
out <- dt[log(na.omit(dt$Calculated_GL_d)) %in% a$out, "species"]
DataSubset <- dt %>%
select(species, EstPi, Calculated_GL_d) %>%
filter(!duplicated(species), !species %in% out) %>%
drop_na()
TreeSubset <- drop.tip(treeMCC, as.character(treeMCC$tip.label[which(!treeMCC$tip.label %in%
DataSubset$species)]))
datacompGlobalTraits <- comparative.data(data = DataSubset, phy = TreeSubset, names.col = "species",
vcv = TRUE, na.omit = TRUE, warn.dropped = TRUE)
mod <- pgls(log(EstPi) ~ log(Calculated_GL_d), data = datacompGlobalTraits, lambda = "ML")
summary(mod)
##### Calculated_GL_d (generation length) is negative sign too, this is the
##### average age of parents of the current cohort, reflecting the turnover
##### rate of breeding individuals in a population. This measure is estimated
##### from reproductive life span and age at first reproduction for most
##### species and input with body size for other species without these
##### measures...### Body mass considered as a control can't compare models fitted with branch
### length transformations
modpiBM2 <- pgls(log(EstPi) ~ log(BodyMassKg_notInputed), data = datacomp)
modpiSpRateBM <- pgls(log(EstPi) ~ log(tipRate) + log(BodyMassKg_notInputed), data = datacomp)
modpiSpRateTraits2 <- pgls(log(EstPi) ~ log(tipRate) + log(BodyMassKg_notInputed) +
log(geoArea_km2) + log(mean_temp) + log(litter_or_clutch_size_n) + log(GenerationLength_d),
data = datacomp)
modpiSpRateTraitsnoBM <- pgls(log(EstPi) ~ log(tipRate) + log(geoArea_km2) + log(mean_temp) +
log(litter_or_clutch_size_n) + log(GenerationLength_d), data = datacomp)
anova(modpiBM2, modpiSpRateBM, modpiSpRateTraits2, modpiSpRateTraitsnoBM, test = "F")
modpiSpRate2 <- pgls(log(EstPi) ~ log(tipRate), data = datacomp)
anova(modpiSpRateBM, modpiSpRate2, test = "F")
modpiSpRateLong <- pgls(log(EstPi) ~ log(tipRate) + log(GenerationLength_d), data = datacomp)
modpiSpRateLongBM <- pgls(log(EstPi) ~ log(tipRate) + log(GenerationLength_d) + log(BodyMassKg_notInputed),
data = datacomp)
anova(modpiSpRate2, modpiSpRateLong, modpiSpRateLongBM, test = "F")Comparison between mutation rate and Ne relationship with the five tested traits:
Including the linear regression with values from MCC and mean rates
mutTraits <- sigClades2 %>%
filter(mean_temp > 10) %>% ## one data point below 10 that makes the figure harder to understand
pivot_longer(cols = c('BodyMassKg_notInputed','geoArea_km2','mean_temp','litter_or_clutch_size_n','GenerationLength_d')) %>%
ggplot( aes(x = value, y = MutRate_mean)) +
geom_point() +
geom_smooth(method = "lm") +
geom_smooth(aes(x = value,
y = MutRate_MCC), method = "lm", color = "red") +
facet_wrap(~fct_inorder(name), scales = 'free', ncol = 1) +
scale_y_log10() + scale_x_log10() +
theme_bw() + labs(caption = "Regression Line using MCC rates in red")
NeTraits <- sigClades2 %>%
filter(mean_temp > 10) %>%
pivot_longer(cols = c('BodyMassKg_notInputed','geoArea_km2','mean_temp','litter_or_clutch_size_n','GenerationLength_d')) %>%
ggplot( aes(x = value, y = Ne_mean)) +
geom_point() +
geom_smooth(method = "lm") +
geom_smooth(aes(x = value,
y = Ne_MCC), method = "lm", color = "red") +
facet_wrap(~fct_inorder(name), scales = 'free', ncol = 1) +
scale_y_log10() + scale_x_log10() +
theme_bw() + labs(caption = "Regression Line using MCC rates in red")
ggarrange(mutTraits, NeTraits, ncol = 2)MCCNeMutKg <- sigClades2 %>%
select(species, Ne_MCC, MutRate_MCC, BodyMassKg_notInputed) %>%
drop_na()
pl1 <- ggplot(MCCNeMutKg, aes(y = Ne_MCC, x = BodyMassKg_notInputed)) + geom_point() +
geom_smooth(method = "lm") + scale_x_log10() + scale_y_log10()
pl2 <- ggplot(MCCNeMutKg, aes(y = MutRate_MCC, x = BodyMassKg_notInputed)) + geom_point() +
geom_smooth(method = "lm") + scale_x_log10() + scale_y_log10()
pl1 | pl2# treeMCC <- TreeSet[['treeMCC']] TreeSubset <- drop.tip(treeMCC,
# as.character(treeMCC$tip.label[which(!treeMCC$tip.label %in%
# MCCNeMutKg$species)])) datacomp <- comparative.data(data =
# data.frame(MCCNeMutKg), phy = TreeSubset, names.col = 'species', vcv = TRUE,
# na.omit = TRUE, warn.dropped = TRUE) modMCCNeKg <- pgls(log(Ne_MCC) ~
# log(BodyMassKg_notInputed), data = datacomp, lambda = 'ML')
# saveRDS(modMCCNeKg, paste0(folder_path,'pgls/extra/modMCCNeKg.rds'))
# modMCCMutKg <- pgls(log(MutRate_MCC) ~ log(BodyMassKg_notInputed), data =
# datacomp, lambda = 'ML') saveRDS(modMCCMutKg,
# paste0(folder_path,'pgls/extra/modMCCMutKg.rds'))
modMCCNeKg <- readRDS(paste0(folder_path, "pgls/extra/modMCCNeKg.rds"))
summary(modMCCNeKg)##
## Call:
## pgls(formula = log(Ne_MCC) ~ log(BodyMassKg_notInputed), data = datacomp,
## lambda = "ML")
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.54427 -0.08479 -0.00518 0.07606 0.40527
##
## Branch length transformations:
##
## kappa [Fix] : 1.000
## lambda [ ML] : 0.546
## lower bound : 0.000, p = < 2.22e-16
## upper bound : 1.000, p = < 2.22e-16
## 95.0% CI : (0.422, 0.658)
## delta [Fix] : 1.000
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 11.527148 0.653869 17.6291 < 2.2e-16 ***
## log(BodyMassKg_notInputed) -0.270275 0.027073 -9.9832 < 2.2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.1253 on 1356 degrees of freedom
## Multiple R-squared: 0.06847, Adjusted R-squared: 0.06778
## F-statistic: 99.67 on 1 and 1356 DF, p-value: < 2.2e-16
modMCCMutKg <- readRDS(paste0(folder_path, "pgls/extra/modMCCMutKg.rds"))
summary(modMCCMutKg)##
## Call:
## pgls(formula = log(MutRate_MCC) ~ log(BodyMassKg_notInputed),
## data = datacomp, lambda = "ML")
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.37265 -0.04992 0.00129 0.04837 0.37575
##
## Branch length transformations:
##
## kappa [Fix] : 1.000
## lambda [ ML] : 0.630
## lower bound : 0.000, p = < 2.22e-16
## upper bound : 1.000, p = < 2.22e-16
## 95.0% CI : (0.537, 0.711)
## delta [Fix] : 1.000
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -15.510063 0.460287 -33.6965 < 2.2e-16 ***
## log(BodyMassKg_notInputed) 0.127002 0.017389 7.3034 4.767e-13 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.08233 on 1356 degrees of freedom
## Multiple R-squared: 0.03785, Adjusted R-squared: 0.03714
## F-statistic: 53.34 on 1 and 1356 DF, p-value: 4.767e-13
MCCNeMutArea <- sigClades2 %>%
select(species, Ne_MCC, MutRate_MCC, geoArea_km2) %>%
drop_na()
pl1 <- ggplot(MCCNeMutArea, aes(y = Ne_MCC, x = geoArea_km2)) + geom_point() + geom_smooth(method = "lm") +
scale_x_log10() + scale_y_log10()
pl2 <- ggplot(MCCNeMutArea, aes(y = MutRate_MCC, x = geoArea_km2)) + geom_point() +
geom_smooth(method = "lm") + scale_x_log10() + scale_y_log10()
pl1 | pl2# treeMCC <- TreeSet[['treeMCC']] TreeSubset <- drop.tip(treeMCC,
# as.character(treeMCC$tip.label[which(!treeMCC$tip.label %in%
# MCCNeMutArea$species)])) datacomp <- comparative.data(data =
# data.frame(MCCNeMutArea), phy = TreeSubset, names.col = 'species', vcv =
# TRUE, na.omit = TRUE, warn.dropped = TRUE) modMCCNeArea <- pgls(log(Ne_MCC) ~
# log(geoArea_km2), data = datacomp, lambda = 'ML') saveRDS(modMCCNeArea,
# paste0(folder_path,'pgls/extra/modMCCNeArea.rds')) modMCCMutArea <-
# pgls(log(MutRate_MCC) ~ log(geoArea_km2), data = datacomp, lambda = 'ML')
# saveRDS(modMCCMutArea, paste0(folder_path,'pgls/extra/modMCCMutArea.rds'))
modMCCNeArea <- readRDS(paste0(folder_path, "pgls/extra/modMCCNeArea.rds"))
summary(modMCCNeArea)##
## Call:
## pgls(formula = log(Ne_MCC) ~ log(geoArea_km2), data = datacomp,
## lambda = "ML")
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.48992 -0.10490 -0.00871 0.09461 0.49095
##
## Branch length transformations:
##
## kappa [Fix] : 1.000
## lambda [ ML] : 0.721
## lower bound : 0.000, p = < 2.22e-16
## upper bound : 1.000, p = < 2.22e-16
## 95.0% CI : (0.637, 0.789)
## delta [Fix] : 1.000
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 10.143670 0.922281 10.9985 < 2.2e-16 ***
## log(geoArea_km2) 0.126434 0.014192 8.9089 < 2.2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.1514 on 1353 degrees of freedom
## Multiple R-squared: 0.05541, Adjusted R-squared: 0.05471
## F-statistic: 79.37 on 1 and 1353 DF, p-value: < 2.2e-16
modMCCMutArea <- readRDS(paste0(folder_path, "pgls/extra/modMCCMutArea.rds"))
summary(modMCCMutArea)##
## Call:
## pgls(formula = log(MutRate_MCC) ~ log(geoArea_km2), data = datacomp,
## lambda = "ML")
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.41689 -0.05621 -0.00162 0.05244 0.45942
##
## Branch length transformations:
##
## kappa [Fix] : 1.000
## lambda [ ML] : 0.700
## lower bound : 0.000, p = < 2.22e-16
## upper bound : 1.000, p = < 2.22e-16
## 95.0% CI : (0.625, 0.763)
## delta [Fix] : 1.000
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -15.6127697 0.5417953 -28.8167 <2e-16 ***
## log(geoArea_km2) -0.0017346 0.0086849 -0.1997 0.8417
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.09008 on 1353 degrees of freedom
## Multiple R-squared: 2.948e-05, Adjusted R-squared: -0.0007096
## F-statistic: 0.03989 on 1 and 1353 DF, p-value: 0.8417
Ne seems to be positively correlated with geographic range size which can be a proxy for population size, while mutation rate is not, so matching popgen predictions?
MCCNeMutTemp <- sigClades2 %>%
select(species, Ne_MCC, MutRate_MCC, mean_temp) %>%
drop_na()
pl1 <- ggplot(MCCNeMutTemp, aes(y = Ne_MCC, x = mean_temp)) + geom_point() + geom_smooth(method = "lm") +
scale_x_log10() + scale_y_log10()
pl2 <- ggplot(MCCNeMutTemp, aes(y = MutRate_MCC, x = mean_temp)) + geom_point() +
geom_smooth(method = "lm") + scale_x_log10() + scale_y_log10()
pl1 | pl2# treeMCC <- TreeSet[['treeMCC']] TreeSubset <- drop.tip(treeMCC,
# as.character(treeMCC$tip.label[which(!treeMCC$tip.label %in%
# MCCNeMutTemp$species)])) datacomp <- comparative.data(data =
# data.frame(MCCNeMutTemp), phy = TreeSubset, names.col = 'species', vcv =
# TRUE, na.omit = TRUE, warn.dropped = TRUE) modMCCNeTemp <- pgls(log(Ne_MCC) ~
# log(mean_temp), data = datacomp, lambda = 'ML') saveRDS(modMCCNeTemp,
# paste0(folder_path,'pgls/extra/modMCCNeTemp.rds')) modMCCMutTemp <-
# pgls(log(MutRate_MCC) ~ log(mean_temp), data = datacomp, lambda = 'ML')
# saveRDS(modMCCMutTemp, paste0(folder_path,'pgls/extra/modMCCMutTemp.rds'))
modMCCNeTemp <- readRDS(paste0(folder_path, "pgls/extra/modMCCNeTemp.rds"))
summary(modMCCNeTemp)##
## Call:
## pgls(formula = log(Ne_MCC) ~ log(mean_temp), data = datacomp,
## lambda = "ML")
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.47142 -0.08717 -0.00124 0.09293 0.45335
##
## Branch length transformations:
##
## kappa [Fix] : 1.000
## lambda [ ML] : 0.663
## lower bound : 0.000, p = < 2.22e-16
## upper bound : 1.000, p = < 2.22e-16
## 95.0% CI : (0.544, 0.759)
## delta [Fix] : 1.000
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 10.77757 1.05315 10.2337 < 2e-16 ***
## log(mean_temp) 0.20290 0.11409 1.7784 0.07564 .
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.1423 on 1032 degrees of freedom
## Multiple R-squared: 0.003055, Adjusted R-squared: 0.002089
## F-statistic: 3.163 on 1 and 1032 DF, p-value: 0.07564
modMCCMutTemp <- readRDS(paste0(folder_path, "pgls/extra/modMCCMutTemp.rds"))
summary(modMCCMutTemp)##
## Call:
## pgls(formula = log(MutRate_MCC) ~ log(mean_temp), data = datacomp,
## lambda = "ML")
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.40711 -0.04760 0.00364 0.05316 0.39243
##
## Branch length transformations:
##
## kappa [Fix] : 1.000
## lambda [ ML] : 0.661
## lower bound : 0.000, p = < 2.22e-16
## upper bound : 1.000, p = < 2.22e-16
## 95.0% CI : (0.567, 0.742)
## delta [Fix] : 1.000
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -16.301163 0.646457 -25.2162 <2e-16 ***
## log(mean_temp) 0.100722 0.070161 1.4356 0.1514
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.08738 on 1032 degrees of freedom
## Multiple R-squared: 0.001993, Adjusted R-squared: 0.001026
## F-statistic: 2.061 on 1 and 1032 DF, p-value: 0.1514
MCCNeMutLitter <- sigClades2 %>%
select(species, Ne_MCC, MutRate_MCC, litter_or_clutch_size_n) %>%
drop_na()
pl1 <- ggplot(MCCNeMutLitter, aes(y = Ne_MCC, x = litter_or_clutch_size_n)) + geom_point() +
geom_smooth(method = "lm") + scale_x_log10() + scale_y_log10()
pl2 <- ggplot(MCCNeMutLitter, aes(y = MutRate_MCC, x = litter_or_clutch_size_n)) +
geom_point() + geom_smooth(method = "lm") + scale_x_log10() + scale_y_log10()
pl1 | pl2# treeMCC <- TreeSet[['treeMCC']] TreeSubset <- drop.tip(treeMCC,
# as.character(treeMCC$tip.label[which(!treeMCC$tip.label %in%
# MCCNeMutLitter$species)])) datacomp <- comparative.data(data =
# data.frame(MCCNeMutLitter), phy = TreeSubset, names.col = 'species', vcv =
# TRUE, na.omit = TRUE, warn.dropped = TRUE) modMCCNeLitter <- pgls(log(Ne_MCC)
# ~ log(litter_or_clutch_size_n), data = datacomp, lambda = 'ML')
# saveRDS(modMCCNeLitter, paste0(folder_path,'pgls/extra/modMCCNeLitter.rds'))
# modMCCMutLitter <- pgls(log(MutRate_MCC) ~ log(litter_or_clutch_size_n), data
# = datacomp, lambda = 'ML') saveRDS(modMCCMutLitter,
# paste0(folder_path,'pgls/extra/modMCCMutLitter.rds'))
modMCCNeLitter <- readRDS(paste0(folder_path, "pgls/extra/modMCCNeLitter.rds"))
summary(modMCCNeLitter)##
## Call:
## pgls(formula = log(Ne_MCC) ~ log(litter_or_clutch_size_n), data = datacomp,
## lambda = "ML")
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.46693 -0.08562 0.00064 0.08423 0.44237
##
## Branch length transformations:
##
## kappa [Fix] : 1.000
## lambda [ ML] : 0.623
## lower bound : 0.000, p = < 2.22e-16
## upper bound : 1.000, p = < 2.22e-16
## 95.0% CI : (0.507, 0.723)
## delta [Fix] : 1.000
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 11.64196 0.78108 14.9050 < 2e-16 ***
## log(litter_or_clutch_size_n) 0.20696 0.11426 1.8113 0.07038 .
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.1389 on 1091 degrees of freedom
## Multiple R-squared: 0.002998, Adjusted R-squared: 0.002084
## F-statistic: 3.281 on 1 and 1091 DF, p-value: 0.07038
modMCCMutLitter <- readRDS(paste0(folder_path, "pgls/extra/modMCCMutLitter.rds"))
summary(modMCCMutLitter)##
## Call:
## pgls(formula = log(MutRate_MCC) ~ log(litter_or_clutch_size_n),
## data = datacomp, lambda = "ML")
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.29063 -0.05220 0.00013 0.04800 0.38418
##
## Branch length transformations:
##
## kappa [Fix] : 1.000
## lambda [ ML] : 0.581
## lower bound : 0.000, p = < 2.22e-16
## upper bound : 1.000, p = < 2.22e-16
## 95.0% CI : (0.476, 0.676)
## delta [Fix] : 1.000
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -15.262477 0.439782 -34.7046 < 2.2e-16 ***
## log(litter_or_clutch_size_n) -0.383732 0.067873 -5.6536 2.005e-08 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.08082 on 1091 degrees of freedom
## Multiple R-squared: 0.02846, Adjusted R-squared: 0.02757
## F-statistic: 31.96 on 1 and 1091 DF, p-value: 2.005e-08
MCCNeMutGLength <- sigClades2 %>%
select(species, Ne_MCC, MutRate_MCC, GenerationLength_d) %>%
drop_na()
pl1 <- ggplot(MCCNeMutGLength, aes(y = Ne_MCC, x = GenerationLength_d)) + geom_point() +
geom_smooth(method = "lm") + scale_x_log10() + scale_y_log10()
pl2 <- ggplot(MCCNeMutGLength, aes(y = MutRate_MCC, x = GenerationLength_d)) + geom_point() +
geom_smooth(method = "lm") + scale_x_log10() + scale_y_log10()
pl1 | pl2# treeMCC <- TreeSet[['treeMCC']] TreeSubset <- drop.tip(treeMCC,
# as.character(treeMCC$tip.label[which(!treeMCC$tip.label %in%
# MCCNeMutGLength$species)])) datacomp <- comparative.data(data =
# data.frame(MCCNeMutGLength), phy = TreeSubset, names.col = 'species', vcv =
# TRUE, na.omit = TRUE, warn.dropped = TRUE) modMCCNeGLength <-
# pgls(log(Ne_MCC) ~ log(GenerationLength_d), data = datacomp, lambda = 'ML')
# saveRDS(modMCCNeGLength,
# paste0(folder_path,'pgls/extra/modMCCNeGLength.rds')) modMCCMutGLength <-
# pgls(log(MutRate_MCC) ~ log(GenerationLength_d), data = datacomp, lambda =
# 'ML') saveRDS(modMCCMutGLength,
# paste0(folder_path,'pgls/extra/modMCCMutGLength.rds'))
modMCCNeGLength <- readRDS(paste0(folder_path, "pgls/extra/modMCCNeGLength.rds"))
summary(modMCCNeGLength)##
## Call:
## pgls(formula = log(Ne_MCC) ~ log(GenerationLength_d), data = datacomp,
## lambda = "ML")
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.33706 -0.07092 0.00339 0.07605 0.43615
##
## Branch length transformations:
##
## kappa [Fix] : 1.000
## lambda [ ML] : 0.353
## lower bound : 0.000, p = 6.6613e-16
## upper bound : 1.000, p = < 2.22e-16
## 95.0% CI : (0.201, 0.508)
## delta [Fix] : 1.000
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 20.753103 0.795793 26.078 < 2.2e-16 ***
## log(GenerationLength_d) -1.261920 0.092566 -13.633 < 2.2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.1067 on 1376 degrees of freedom
## Multiple R-squared: 0.119, Adjusted R-squared: 0.1184
## F-statistic: 185.9 on 1 and 1376 DF, p-value: < 2.2e-16
modMCCMutGLength <- readRDS(paste0(folder_path, "pgls/extra/modMCCMutGLength.rds"))
summary(modMCCMutGLength)##
## Call:
## pgls(formula = log(MutRate_MCC) ~ log(GenerationLength_d), data = datacomp,
## lambda = "ML")
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.242143 -0.033702 0.001037 0.034028 0.238696
##
## Branch length transformations:
##
## kappa [Fix] : 1.000
## lambda [ ML] : 0.307
## lower bound : 0.000, p = 2.7756e-14
## upper bound : 1.000, p = < 2.22e-16
## 95.0% CI : (0.178, 0.447)
## delta [Fix] : 1.000
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -22.531352 0.433682 -51.954 < 2.2e-16 ***
## log(GenerationLength_d) 0.969306 0.051373 18.868 < 2.2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.05971 on 1376 degrees of freedom
## Multiple R-squared: 0.2055, Adjusted R-squared: 0.205
## F-statistic: 356 on 1 and 1376 DF, p-value: < 2.2e-16
MCCNeSpRateTraits <- sigClades2 %>%
select(species, Ne_MCC, SpRate_MCC, BodyMassKg_notInputed, geoArea_km2, mean_temp,
litter_or_clutch_size_n, GenerationLength_d) %>%
drop_na()
treeMCC <- TreeSet[["treeMCC"]]
TreeSubset <- drop.tip(treeMCC, as.character(treeMCC$tip.label[which(!treeMCC$tip.label %in%
MCCNeSpRateTraits$species)]))
datacomp <- comparative.data(data = data.frame(MCCNeSpRateTraits), phy = TreeSubset,
names.col = "species", vcv = TRUE, na.omit = TRUE, warn.dropped = TRUE)
modMCCNeSpRateTraits <- pgls(log(Ne_MCC) ~ log(SpRate_MCC) + log(BodyMassKg_notInputed) +
log(geoArea_km2) + log(mean_temp) + log(litter_or_clutch_size_n) + log(GenerationLength_d),
data = datacomp, lambda = "ML")
saveRDS(modMCCNeSpRateTraits, paste0(folder_path, "pgls/extra/modMCCNeSpRateTraits.rds"))
modMCCNeSpRateTraits <- readRDS(paste0(folder_path, "pgls/extra/modMCCNeSpRateTraits.rds"))
summary(modMCCNeSpRateTraits)##
## Call:
## pgls(formula = log(Ne_MCC) ~ log(SpRate_MCC) + log(BodyMassKg_notInputed) +
## log(geoArea_km2) + log(mean_temp) + log(litter_or_clutch_size_n) +
## log(GenerationLength_d), data = datacomp, lambda = "ML")
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.35798 -0.06726 0.00038 0.06484 0.28728
##
## Branch length transformations:
##
## kappa [Fix] : 1.000
## lambda [ ML] : 0.341
## lower bound : 0.000, p = 1.4454e-08
## upper bound : 1.000, p = < 2.22e-16
## 95.0% CI : (0.164, 0.530)
## delta [Fix] : 1.000
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 15.665790 1.211749 12.9283 < 2.2e-16 ***
## log(SpRate_MCC) -0.154028 0.099381 -1.5499 0.121539
## log(BodyMassKg_notInputed) -0.140818 0.031433 -4.4800 8.471e-06 ***
## log(geoArea_km2) 0.129270 0.018801 6.8758 1.183e-11 ***
## log(mean_temp) 0.238170 0.107015 2.2256 0.026301 *
## log(litter_or_clutch_size_n) -0.334276 0.110605 -3.0222 0.002583 **
## log(GenerationLength_d) -0.999144 0.125026 -7.9915 4.219e-15 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.09969 on 862 degrees of freedom
## Multiple R-squared: 0.1956, Adjusted R-squared: 0.19
## F-statistic: 34.94 on 6 and 862 DF, p-value: < 2.2e-16
MCCMutRateSpRateTraits <- sigClades2 %>%
select(species, MutRate_MCC, SpRate_MCC, Ne_MCC, BodyMassKg_notInputed, geoArea_km2,
mean_temp, litter_or_clutch_size_n, GenerationLength_d) %>%
drop_na()
# treeMCC <- TreeSet[['treeMCC']] TreeSubset <- drop.tip(treeMCC,
# as.character(treeMCC$tip.label[which(!treeMCC$tip.label %in%
# MCCMutRateSpRateTraits$species)])) datacomp <- comparative.data(data =
# data.frame(MCCMutRateSpRateTraits), phy = TreeSubset, names.col = 'species',
# vcv = TRUE, na.omit = TRUE, warn.dropped = TRUE) modMCCMutRateSpRateTraits <-
# pgls(log(MutRate_MCC) ~ log(SpRate_MCC) + log(BodyMassKg_notInputed) +
# log(geoArea_km2) + log(mean_temp) + log(litter_or_clutch_size_n) +
# log(GenerationLength_d), data = datacomp, lambda = 'ML')
# saveRDS(modMCCMutRateSpRateTraits,
# paste0(folder_path,'pgls/extra/modMCCMutRateSpRateTraits.rds'))
modMCCMutRateSpRateTraits <- readRDS(paste0(folder_path, "pgls/extra/modMCCMutRateSpRateTraits.rds"))
summary(modMCCMutRateSpRateTraits)##
## Call:
## pgls(formula = log(MutRate_MCC) ~ log(SpRate_MCC) + log(BodyMassKg_notInputed) +
## log(geoArea_km2) + log(mean_temp) + log(litter_or_clutch_size_n) +
## log(GenerationLength_d), data = datacomp, lambda = "ML")
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.199101 -0.037127 -0.000453 0.035138 0.215148
##
## Branch length transformations:
##
## kappa [Fix] : 1.000
## lambda [ ML] : 0.244
## lower bound : 0.000, p = 3.1609e-08
## upper bound : 1.000, p = < 2.22e-16
## 95.0% CI : (0.120, 0.398)
## delta [Fix] : 1.000
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -22.4318630 0.7058234 -31.7811 < 2.2e-16 ***
## log(SpRate_MCC) -0.1550365 0.0589336 -2.6307 0.008673 **
## log(BodyMassKg_notInputed) 0.0018340 0.0178578 0.1027 0.918227
## log(geoArea_km2) 0.0034342 0.0115157 0.2982 0.765609
## log(mean_temp) 0.0600173 0.0651450 0.9213 0.357158
## log(litter_or_clutch_size_n) -0.0939521 0.0649365 -1.4468 0.148308
## log(GenerationLength_d) 0.8743938 0.0731407 11.9550 < 2.2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.05819 on 862 degrees of freedom
## Multiple R-squared: 0.2378, Adjusted R-squared: 0.2325
## F-statistic: 44.82 on 6 and 862 DF, p-value: < 2.2e-16
Speciation rate is not correlated with Ne when including traits but it’s the only variable that correlates with mutation rate. Perhaps the analyses with traits would be more meaningful to explain the correlation between genetic diversity and speciation rates when decomposing pi into Ne and mutation rate (theta (pi) = 4Neu for an idealized population and diploid and nuclear data), but I am not sure if we wouldn’t be overinterpreting these results. Why would Ne and spRate be correlated but when adding traits to the model spRate is not significant anymore, indirect effects?
# treeMCC <- TreeSet[['treeMCC']] TreeSubset <- drop.tip(treeMCC,
# as.character(treeMCC$tip.label[which(!treeMCC$tip.label %in%
# MCCMutRateSpRateTraits$species)])) datacomp <- comparative.data(data =
# data.frame(MCCMutRateSpRateTraits), phy = TreeSubset, names.col = 'species',
# vcv = TRUE, na.omit = TRUE, warn.dropped = TRUE) modMCCNeSpRate <-
# pgls(log(Ne_MCC) ~ log(SpRate_MCC), data = datacomp, lambda = 'ML')
# saveRDS(modMCCNeSpRate, paste0(folder_path, 'pgls/extra/modMCCNeSpRate.rds'))
# modMCCNeSpRatekg <- pgls(log(Ne_MCC) ~ log(SpRate_MCC) +
# log(BodyMassKg_notInputed), data = datacomp, lambda = 'ML')
# saveRDS(modMCCNeSpRatekg, paste0(folder_path,
# 'pgls/extra/modMCCNeSpRatekg.rds')) modMCCNeSpRateTemp0kg <- pgls(log(Ne_MCC)
# ~ log(SpRate_MCC) + log(BodyMassKg_notInputed) + log(mean_temp), data =
# datacomp, lambda = 'ML') saveRDS(modMCCNeSpRateTemp0kg, paste0(folder_path,
# 'pgls/extra/modMCCNeSpRateTemp0kg.rds')) modMCCNeSpRateArea <-
# pgls(log(Ne_MCC) ~ log(SpRate_MCC) + log(BodyMassKg_notInputed) +
# log(geoArea_km2), data = datacomp, lambda = 'ML') saveRDS(modMCCNeSpRateArea,
# paste0(folder_path,'pgls/extra/modMCCNeSpRateArea.rds'))
# modMCCNeSpRateTemp0Area <- pgls(log(Ne_MCC) ~ log(SpRate_MCC) +
# log(geoArea_km2) + log(mean_temp), data = datacomp, lambda = 'ML')
# saveRDS(modMCCNeSpRateTemp0Area,
# paste0(folder_path,'pgls/extra/modMCCNeSpRateTemp0Area.rds'))
# modMCCNeSpRateTemp0 <- pgls(log(Ne_MCC) ~ log(SpRate_MCC) + log(mean_temp),
# data = datacomp, lambda = 'ML') saveRDS(modMCCNeSpRateTemp0,
# paste0(folder_path, 'pgls/extra/modMCCNeSpRateTemp0.rds')) modMCCNeSpRateTemp
# <- pgls(log(Ne_MCC) ~ log(SpRate_MCC) + log(BodyMassKg_notInputed) +
# log(geoArea_km2) + log(mean_temp), data = datacomp, lambda = 'ML')
# saveRDS(modMCCNeSpRateTemp, paste0(folder_path,
# 'pgls/extra/modMCCNeSpRateTemp.rds')) modMCCNeSpRateLitter <-
# pgls(log(Ne_MCC) ~ log(SpRate_MCC) + log(BodyMassKg_notInputed) +
# log(geoArea_km2) + log(mean_temp) + log(litter_or_clutch_size_n), data =
# datacomp, lambda = 'ML') saveRDS(modMCCNeSpRateLitter, paste0(folder_path,
# 'pgls/extra/modMCCNeSpRateLitter.rds')) modMCCNeSpRateLitter0 <-
# pgls(log(Ne_MCC) ~ log(SpRate_MCC) + log(litter_or_clutch_size_n), data =
# datacomp, lambda = 'ML') saveRDS(modMCCNeSpRateLitter0, paste0(folder_path,
# 'pgls/extra/modMCCNeSpRateLitter0.rds')) modMCCNeSpRateGLength <-
# pgls(log(Ne_MCC) ~ log(SpRate_MCC) + log(BodyMassKg_notInputed) +
# log(geoArea_km2) + log(mean_temp) + log(litter_or_clutch_size_n) +
# log(GenerationLength_d), data = datacomp, lambda = 'ML')
# saveRDS(modMCCNeSpRateGLength, paste0(folder_path,
# 'pgls/extra/modMCCNeSpRateGLength.rds')) modMCCNeSpRateGLength0 <-
# pgls(log(Ne_MCC) ~ log(SpRate_MCC) + log(GenerationLength_d), data =
# datacomp, lambda = 'ML') saveRDS(modMCCNeSpRateGLength0, paste0(folder_path,
# 'pgls/extra/modMCCNeSpRateGLength0.rds'))
modMCCNeSpRate <- readRDS(paste0(folder_path, "pgls/extra/modMCCNeSpRate.rds"))
summary(modMCCNeSpRate)##
## Call:
## pgls(formula = log(Ne_MCC) ~ log(SpRate_MCC), data = datacomp,
## lambda = "ML")
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.54477 -0.09021 0.00215 0.08878 0.39689
##
## Branch length transformations:
##
## kappa [Fix] : 1.000
## lambda [ ML] : 0.608
## lower bound : 0.000, p = < 2.22e-16
## upper bound : 1.000, p = < 2.22e-16
## 95.0% CI : (0.473, 0.724)
## delta [Fix] : 1.000
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 11.47250 0.76894 14.9198 <2e-16 ***
## log(SpRate_MCC) -0.23642 0.12114 -1.9517 0.0513 .
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.134 on 867 degrees of freedom
## Multiple R-squared: 0.004374, Adjusted R-squared: 0.003226
## F-statistic: 3.809 on 1 and 867 DF, p-value: 0.0513
modMCCNeSpRatekg <- readRDS(paste0(folder_path, "pgls/extra/modMCCNeSpRatekg.rds"))
summary(modMCCNeSpRatekg)##
## Call:
## pgls(formula = log(Ne_MCC) ~ log(SpRate_MCC) + log(BodyMassKg_notInputed),
## data = datacomp, lambda = "ML")
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.53942 -0.07676 0.00041 0.07461 0.38445
##
## Branch length transformations:
##
## kappa [Fix] : 1.000
## lambda [ ML] : 0.511
## lower bound : 0.000, p = < 2.22e-16
## upper bound : 1.000, p = < 2.22e-16
## 95.0% CI : (0.361, 0.651)
## delta [Fix] : 1.000
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 11.173545 0.631838 17.6842 < 2e-16 ***
## log(SpRate_MCC) -0.219117 0.111453 -1.9660 0.04962 *
## log(BodyMassKg_notInputed) -0.267031 0.031427 -8.4968 < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.1184 on 866 degrees of freedom
## Multiple R-squared: 0.08206, Adjusted R-squared: 0.07994
## F-statistic: 38.71 on 2 and 866 DF, p-value: < 2.2e-16
modMCCNeSpRateTemp0kg <- readRDS(paste0(folder_path, "pgls/extra/modMCCNeSpRateTemp0kg.rds"))
summary(modMCCNeSpRateTemp0kg)##
## Call:
## pgls(formula = log(Ne_MCC) ~ log(SpRate_MCC) + log(BodyMassKg_notInputed) +
## log(mean_temp), data = datacomp, lambda = "ML")
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.54008 -0.08460 -0.01031 0.06740 0.38758
##
## Branch length transformations:
##
## kappa [Fix] : 1.000
## lambda [ ML] : 0.511
## lower bound : 0.000, p = < 2.22e-16
## upper bound : 1.000, p = < 2.22e-16
## 95.0% CI : (0.365, 0.649)
## delta [Fix] : 1.000
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 10.133349 0.897767 11.2873 < 2e-16 ***
## log(SpRate_MCC) -0.204774 0.111708 -1.8331 0.06713 .
## log(BodyMassKg_notInputed) -0.264978 0.031429 -8.4309 < 2e-16 ***
## log(mean_temp) 0.182945 0.112211 1.6304 0.10339
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.1183 on 865 degrees of freedom
## Multiple R-squared: 0.08485, Adjusted R-squared: 0.08168
## F-statistic: 26.73 on 3 and 865 DF, p-value: < 2.2e-16
modMCCNeSpRateArea <- readRDS(paste0(folder_path, "pgls/extra/modMCCNeSpRateArea.rds"))
summary(modMCCNeSpRateArea)##
## Call:
## pgls(formula = log(Ne_MCC) ~ log(SpRate_MCC) + log(BodyMassKg_notInputed) +
## log(geoArea_km2), data = datacomp, lambda = "ML")
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.38499 -0.07232 0.00743 0.08131 0.36520
##
## Branch length transformations:
##
## kappa [Fix] : 1.000
## lambda [ ML] : 0.543
## lower bound : 0.000, p = < 2.22e-16
## upper bound : 1.000, p = < 2.22e-16
## 95.0% CI : (0.398, 0.676)
## delta [Fix] : 1.000
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 9.610188 0.695754 13.8126 < 2.2e-16 ***
## log(SpRate_MCC) -0.147557 0.111082 -1.3284 0.1844
## log(BodyMassKg_notInputed) -0.276661 0.031373 -8.8184 < 2.2e-16 ***
## log(geoArea_km2) 0.123359 0.019238 6.4121 2.359e-10 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.1189 on 865 degrees of freedom
## Multiple R-squared: 0.122, Adjusted R-squared: 0.119
## F-statistic: 40.08 on 3 and 865 DF, p-value: < 2.2e-16
modMCCNeSpRateTemp0Area <- readRDS(paste0(folder_path, "pgls/extra/modMCCNeSpRateTemp0Area.rds"))
summary(modMCCNeSpRateTemp0Area)##
## Call:
## pgls(formula = log(Ne_MCC) ~ log(SpRate_MCC) + log(geoArea_km2) +
## log(mean_temp), data = datacomp, lambda = "ML")
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.39931 -0.08740 -0.00013 0.09494 0.46578
##
## Branch length transformations:
##
## kappa [Fix] : 1.000
## lambda [ ML] : 0.637
## lower bound : 0.000, p = < 2.22e-16
## upper bound : 1.000, p = < 2.22e-16
## 95.0% CI : (0.510, 0.745)
## delta [Fix] : 1.000
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 8.383318 1.068123 7.8486 1.243e-14 ***
## log(SpRate_MCC) -0.145596 0.121099 -1.2023 0.22958
## log(geoArea_km2) 0.122371 0.019983 6.1238 1.385e-09 ***
## log(mean_temp) 0.271434 0.115039 2.3595 0.01852 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.135 on 865 degrees of freedom
## Multiple R-squared: 0.04956, Adjusted R-squared: 0.04626
## F-statistic: 15.03 on 3 and 865 DF, p-value: 1.516e-09
modMCCNeSpRateTemp <- readRDS(paste0(folder_path, "pgls/extra/modMCCNeSpRateTemp.rds"))
summary(modMCCNeSpRateTemp)##
## Call:
## pgls(formula = log(Ne_MCC) ~ log(SpRate_MCC) + log(BodyMassKg_notInputed) +
## log(geoArea_km2) + log(mean_temp), data = datacomp, lambda = "ML")
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.38784 -0.07579 -0.00494 0.07775 0.35575
##
## Branch length transformations:
##
## kappa [Fix] : 1.000
## lambda [ ML] : 0.543
## lower bound : 0.000, p = < 2.22e-16
## upper bound : 1.000, p = < 2.22e-16
## 95.0% CI : (0.400, 0.674)
## delta [Fix] : 1.000
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 8.289628 0.944975 8.7723 < 2.2e-16 ***
## log(SpRate_MCC) -0.128794 0.111250 -1.1577 0.24731
## log(BodyMassKg_notInputed) -0.274022 0.031342 -8.7429 < 2.2e-16 ***
## log(geoArea_km2) 0.125758 0.019238 6.5371 1.072e-10 ***
## log(mean_temp) 0.226806 0.110057 2.0608 0.03962 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.1186 on 864 degrees of freedom
## Multiple R-squared: 0.1263, Adjusted R-squared: 0.1223
## F-statistic: 31.23 on 4 and 864 DF, p-value: < 2.2e-16
modMCCNeSpRateTemp0 <- readRDS(paste0(folder_path, "pgls/extra/modMCCNeSpRateTemp0.rds"))
summary(modMCCNeSpRateTemp0)##
## Call:
## pgls(formula = log(Ne_MCC) ~ log(SpRate_MCC) + log(mean_temp),
## data = datacomp, lambda = "ML")
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.54672 -0.08820 -0.00425 0.08745 0.38011
##
## Branch length transformations:
##
## kappa [Fix] : 1.000
## lambda [ ML] : 0.609
## lower bound : 0.000, p = < 2.22e-16
## upper bound : 1.000, p = < 2.22e-16
## 95.0% CI : (0.477, 0.724)
## delta [Fix] : 1.000
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 10.18085 1.01891 9.9919 < 2e-16 ***
## log(SpRate_MCC) -0.21903 0.12135 -1.8049 0.07144 .
## log(mean_temp) 0.22651 0.11707 1.9348 0.05334 .
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.1339 on 866 degrees of freedom
## Multiple R-squared: 0.008644, Adjusted R-squared: 0.006355
## F-statistic: 3.776 on 2 and 866 DF, p-value: 0.0233
modMCCNeSpRateLitter <- readRDS(paste0(folder_path, "pgls/extra/modMCCNeSpRateLitter.rds"))
summary(modMCCNeSpRateLitter)##
## Call:
## pgls(formula = log(Ne_MCC) ~ log(SpRate_MCC) + log(BodyMassKg_notInputed) +
## log(geoArea_km2) + log(mean_temp) + log(litter_or_clutch_size_n),
## data = datacomp, lambda = "ML")
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.45234 -0.08206 -0.00008 0.07695 0.36439
##
## Branch length transformations:
##
## kappa [Fix] : 1.000
## lambda [ ML] : 0.565
## lower bound : 0.000, p = < 2.22e-16
## upper bound : 1.000, p = < 2.22e-16
## 95.0% CI : (0.424, 0.690)
## delta [Fix] : 1.000
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 8.602918 0.981412 8.7659 < 2.2e-16 ***
## log(SpRate_MCC) -0.121159 0.112305 -1.0788 0.28096
## log(BodyMassKg_notInputed) -0.288956 0.032791 -8.8121 < 2.2e-16 ***
## log(geoArea_km2) 0.130584 0.019422 6.7236 3.222e-11 ***
## log(mean_temp) 0.195982 0.111846 1.7522 0.08009 .
## log(litter_or_clutch_size_n) -0.193329 0.121942 -1.5854 0.11324
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.1208 on 863 degrees of freedom
## Multiple R-squared: 0.128, Adjusted R-squared: 0.123
## F-statistic: 25.35 on 5 and 863 DF, p-value: < 2.2e-16
modMCCNeSpRateLitter0 <- readRDS(paste0(folder_path, "pgls/extra/modMCCNeSpRateLitter0.rds"))
summary(modMCCNeSpRateLitter0)##
## Call:
## pgls(formula = log(Ne_MCC) ~ log(SpRate_MCC) + log(litter_or_clutch_size_n),
## data = datacomp, lambda = "ML")
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.41472 -0.08482 0.00675 0.08964 0.39498
##
## Branch length transformations:
##
## kappa [Fix] : 1.000
## lambda [ ML] : 0.594
## lower bound : 0.000, p = < 2.22e-16
## upper bound : 1.000, p = < 2.22e-16
## 95.0% CI : (0.452, 0.717)
## delta [Fix] : 1.000
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 11.33891 0.76347 14.8519 < 2e-16 ***
## log(SpRate_MCC) -0.24158 0.12036 -2.0072 0.04504 *
## log(litter_or_clutch_size_n) 0.11880 0.12466 0.9530 0.34085
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.1322 on 866 degrees of freedom
## Multiple R-squared: 0.005572, Adjusted R-squared: 0.003275
## F-statistic: 2.426 on 2 and 866 DF, p-value: 0.08899
modMCCNeSpRateGLength <- readRDS(paste0(folder_path, "pgls/extra/modMCCNeSpRateGLength.rds"))
summary(modMCCNeSpRateGLength)##
## Call:
## pgls(formula = log(Ne_MCC) ~ log(SpRate_MCC) + log(BodyMassKg_notInputed) +
## log(geoArea_km2) + log(mean_temp) + log(litter_or_clutch_size_n) +
## log(GenerationLength_d), data = datacomp, lambda = "ML")
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.35798 -0.06726 0.00038 0.06484 0.28728
##
## Branch length transformations:
##
## kappa [Fix] : 1.000
## lambda [ ML] : 0.341
## lower bound : 0.000, p = 1.4454e-08
## upper bound : 1.000, p = < 2.22e-16
## 95.0% CI : (0.164, 0.530)
## delta [Fix] : 1.000
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 15.665790 1.211749 12.9283 < 2.2e-16 ***
## log(SpRate_MCC) -0.154028 0.099381 -1.5499 0.121539
## log(BodyMassKg_notInputed) -0.140818 0.031433 -4.4800 8.471e-06 ***
## log(geoArea_km2) 0.129270 0.018801 6.8758 1.183e-11 ***
## log(mean_temp) 0.238170 0.107015 2.2256 0.026301 *
## log(litter_or_clutch_size_n) -0.334276 0.110605 -3.0222 0.002583 **
## log(GenerationLength_d) -0.999144 0.125026 -7.9915 4.219e-15 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.09969 on 862 degrees of freedom
## Multiple R-squared: 0.1956, Adjusted R-squared: 0.19
## F-statistic: 34.94 on 6 and 862 DF, p-value: < 2.2e-16
modMCCNeSpRateGLength0 <- readRDS(paste0(folder_path, "pgls/extra/modMCCNeSpRateGLength0.rds"))
summary(modMCCNeSpRateGLength0)##
## Call:
## pgls(formula = log(Ne_MCC) ~ log(SpRate_MCC) + log(GenerationLength_d),
## data = datacomp, lambda = "ML")
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.35204 -0.06375 0.00524 0.07292 0.28232
##
## Branch length transformations:
##
## kappa [Fix] : 1.000
## lambda [ ML] : 0.248
## lower bound : 0.000, p = 1.6402e-05
## upper bound : 1.000, p = < 2.22e-16
## 95.0% CI : (0.088, 0.455)
## delta [Fix] : 1.000
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 19.293882 0.793870 24.3036 < 2.2e-16 ***
## log(SpRate_MCC) -0.288733 0.098269 -2.9382 0.003389 **
## log(GenerationLength_d) -1.126660 0.097269 -11.5830 < 2.2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.09844 on 866 degrees of freedom
## Multiple R-squared: 0.1428, Adjusted R-squared: 0.1409
## F-statistic: 72.15 on 2 and 866 DF, p-value: < 2.2e-16
Perhaps due to some interaction between geographic range size and temperature? Still not sure how to interpret this…
Maybe should do proper phylogenetic analyses (PGLS and BMLM for posterior trees) with mutation rate/Ne and traits? Using MCC tree, Body size, generation length, range area and mean temperature seem to be correlated with Ne and not correlated with mutation rate. Higher Ne for species with lower body sizes, larger range sizes, that occur in warmer climates and short longevity.
Range size being correlated with Ne and not with mutation rate gives a bit more confidence to these values of Ne (better would be to correlate with census size). But could had made sense mean temperature to be correlated with mutation rate.
Checking that MCC vs mean values are highly correlated…
MutRate_meanMCC <- filter(sigClades2, !species %in% exclude) %>%
ggplot(aes(x = MutRate_MCC, y = MutRate_mean, color = signif)) + geom_point() +
geom_smooth(method = "lm") + scale_y_log10() + scale_x_log10() + theme_bw()
SpRate_meanMCC <- filter(sigClades2, !species %in% exclude) %>%
ggplot(aes(x = SpRate_MCC, y = SpRate_mean, color = signif)) + geom_point() +
geom_smooth(method = "lm") + scale_y_log10() + scale_x_log10() + theme_bw()
Ne_meanMCC <- filter(sigClades2, !species %in% exclude) %>%
ggplot(aes(x = Ne_MCC, y = Ne_mean, color = signif)) + geom_point() + geom_smooth(method = "lm") +
scale_y_log10() + scale_x_log10() + theme_bw()
ggarrange(MutRate_meanMCC, SpRate_meanMCC, Ne_meanMCC, ncol = 3, common.legend = T)Why values from MCC analysis give different pattern from using mean values across 100 posterior trees?
spmutClade_mean <- ggplot(filter(sigClades2, !species %in% exclude), aes(x = SpRate_mean,
y = MutRate_mean, color = mutclade)) + geom_point(size = 0.7) + geom_smooth(method = "lm",
se = F) + scale_y_log10() + scale_x_log10() + theme_bw() + scale_colour_manual(values = iwanthue(16))
spmutClade_MCC <- ggplot(filter(sigClades2, !species %in% exclude), aes(x = SpRate_MCC,
y = MutRate_MCC, color = mutclade)) + geom_point(size = 0.7) + geom_smooth(method = "lm",
se = F) + scale_y_log10() + scale_x_log10() + theme_bw() + scale_colour_manual(values = iwanthue(16))
ggarrange(spmutClade_mean, spmutClade_MCC, ncol = 2, common.legend = T, legend = "bottom")spmut <- gendivSpRateMutRate %>%
filter(set %in% "treeMCC") %>%
select(species, tipRate, mutRate, mutclade, expNsub, time)
ggplot(spmut, aes(y = tipRate, x = mutRate, color = mutclade)) + geom_point() + geom_smooth(method = "lm",
se = F) + scale_y_log10() + scale_x_log10() + scale_colour_manual(values = iwanthue(25))
ggplot(spmut, aes(y = tipRate, x = expNsub, color = mutclade)) + geom_point() + geom_smooth(method = "lm",
se = F) + scale_y_log10() + scale_x_log10() + scale_colour_manual(values = iwanthue(25))
ggplot(spmut, aes(y = tipRate, x = mutRate, color = mutclade)) + geom_point() + geom_smooth(method = "lm",
se = F) + scale_y_log10() + scale_x_log10() + scale_colour_manual(values = iwanthue(25))
ggplot(spmut, aes(y = tipRate, x = expNsub, color = mutclade)) + geom_point() + geom_smooth(method = "lm",
se = F) + scale_y_log10() + scale_x_log10() + scale_colour_manual(values = iwanthue(25))
ggplot(spmut, aes(y = tipRate, x = time, color = mutclade)) + geom_point() + geom_smooth(method = "lm",
se = F) + scale_y_log10() + scale_x_log10() + scale_colour_manual(values = iwanthue(25))
ggplot(spmut, aes(y = expNsub, x = time, color = mutclade)) + geom_point() + geom_smooth(method = "lm",
se = F) + scale_y_log10() + scale_x_log10() + scale_colour_manual(values = iwanthue(25))
ggplot(spmut, aes(y = mutRate, x = time, color = mutclade)) + geom_point() + geom_smooth(method = "lm",
se = F) + scale_y_log10() + scale_x_log10() + scale_colour_manual(values = iwanthue(25))gendivSpRateTrait <- gendivSpRate %>%
left_join(., traitData, by = "species")
mGendivSpRateTrait <- spRate %>%
filter(!set %in% "treeMCC") %>%
group_by(species) %>%
dplyr::summarise(rate_mean = mean(tipRate, na.rm = TRUE), rate_sd = sd(tipRate,
na.rm = TRUE), rate_se = rate_sd/sqrt(n()), rate_lower.ci = CI(tipRate, ci = 0.95)[1],
rate_upper.ci = CI(tipRate, ci = 0.95)[3], clades = unique(clades)) %>%
arrange(clades) %>%
mutate(ord = seq(1, length(n)), species = forcats::fct_reorder(species, ord),
clades = forcats::fct_relevel(case_when(!clades %in% unique(PGLSclade$clade) ~
"Other", TRUE ~ clades), "Other", after = 14)) %>%
inner_join(., select(gen.div, species, EstPi, EstTheta, Nind), by = "species") %>%
inner_join(., traitData, by = "species")
qR = ggplot(mGendivSpRateTrait, aes(y = rate_mean, x = DispDist_notInputed)) + geom_point() +
geom_smooth(method = "lm") + scale_x_log10() + scale_y_log10() + theme_bw() +
geom_smooth(data = filter(gendivSpRateTrait, set %in% "treeMCC"), aes(y = tipRate,
x = DispDist_notInputed), method = "lm", color = "red") + labs(caption = "Blue - regression line with mean rates; Red = regression line with MCC tree")
qp = ggplot(mGendivSpRateTrait, aes(y = EstPi, x = DispDist_notInputed)) + geom_point() +
geom_smooth(method = "lm") + scale_x_log10() + scale_y_log10() + theme_bw()
ggarrange(qR, qp, ncol = 1, heights = c(1.1, 1))OLS of speciation rates and genetic diversity predicted by dispersal distance
With mean rates vs MCC rates and logged dispersal vs non-logged
summary(lm(log(rate_mean) ~ log(DispDist_notInputed), data = mGendivSpRateTrait))##
## Call:
## lm(formula = log(rate_mean) ~ log(DispDist_notInputed), data = mGendivSpRateTrait)
##
## Residuals:
## Min 1Q Median 3Q Max
## -2.36605 -0.31725 0.08203 0.36261 1.32706
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -1.268350 0.013189 -96.167 < 2e-16 ***
## log(DispDist_notInputed) 0.023905 0.006474 3.693 0.000229 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.5266 on 1598 degrees of freedom
## (81 observations deleted due to missingness)
## Multiple R-squared: 0.00846, Adjusted R-squared: 0.00784
## F-statistic: 13.63 on 1 and 1598 DF, p-value: 0.0002295
summary(lm(log(rate_mean) ~ DispDist_notInputed, data = mGendivSpRateTrait))##
## Call:
## lm(formula = log(rate_mean) ~ DispDist_notInputed, data = mGendivSpRateTrait)
##
## Residuals:
## Min 1Q Median 3Q Max
## -2.34137 -0.32305 0.08152 0.37785 1.44098
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -1.266e+00 1.328e-02 -95.36 <2e-16 ***
## DispDist_notInputed 2.050e-05 2.562e-05 0.80 0.424
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.5288 on 1598 degrees of freedom
## (81 observations deleted due to missingness)
## Multiple R-squared: 0.0004005, Adjusted R-squared: -0.0002251
## F-statistic: 0.6402 on 1 and 1598 DF, p-value: 0.4238
summary(lm(log(tipRate) ~ log(DispDist_notInputed), data = filter(gendivSpRateTrait,
set %in% "treeMCC")))##
## Call:
## lm(formula = log(tipRate) ~ log(DispDist_notInputed), data = filter(gendivSpRateTrait,
## set %in% "treeMCC"))
##
## Residuals:
## Min 1Q Median 3Q Max
## -1.89374 -0.29633 -0.00365 0.30936 1.57642
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -1.469047 0.012793 -114.830 <2e-16 ***
## log(DispDist_notInputed) 0.005928 0.006280 0.944 0.345
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.5108 on 1598 degrees of freedom
## (2464 observations deleted due to missingness)
## Multiple R-squared: 0.0005573, Adjusted R-squared: -6.818e-05
## F-statistic: 0.891 on 1 and 1598 DF, p-value: 0.3454
summary(lm(log(EstPi) ~ log(DispDist_notInputed), data = mGendivSpRateTrait))##
## Call:
## lm(formula = log(EstPi) ~ log(DispDist_notInputed), data = mGendivSpRateTrait)
##
## Residuals:
## Min 1Q Median 3Q Max
## -4.8897 -0.6364 0.1482 0.7631 2.1105
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -4.06843 0.02526 -161.071 < 2e-16 ***
## log(DispDist_notInputed) -0.10215 0.01240 -8.239 3.59e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 1.009 on 1598 degrees of freedom
## (81 observations deleted due to missingness)
## Multiple R-squared: 0.04074, Adjusted R-squared: 0.04014
## F-statistic: 67.87 on 1 and 1598 DF, p-value: 3.587e-16
summary(lm(log(EstPi) ~ DispDist_notInputed, data = mGendivSpRateTrait))##
## Call:
## lm(formula = log(EstPi) ~ DispDist_notInputed, data = mGendivSpRateTrait)
##
## Residuals:
## Min 1Q Median 3Q Max
## -4.4732 -0.6824 0.1525 0.7757 2.3263
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -4.069e+00 2.568e-02 -158.46 < 2e-16 ***
## DispDist_notInputed -2.397e-04 4.953e-05 -4.84 1.42e-06 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 1.022 on 1598 degrees of freedom
## (81 observations deleted due to missingness)
## Multiple R-squared: 0.01445, Adjusted R-squared: 0.01383
## F-statistic: 23.43 on 1 and 1598 DF, p-value: 1.424e-06
PGLS of speciation rates and genetic diversity predicted by dispersal distance using MCC rates with logged dispersal
# load(paste0(folder_path,'speciation_rate/output/upham_4064sp_FR_MCCposterior100.rdata'))
# DataSubsetMCC <- gendivSpRateTrait %>% filter(treeN %in% 'treeMCC') %>%
# select(species, tipRate, EstPi, DispDist_notInputed) %>% drop_na() treeMCC <-
# TreeSet[['treeMCC']] TreeSubset <- drop.tip(treeMCC,
# as.character(treeMCC$tip.label[which(!treeMCC$tip.label %in%
# DataSubsetMCC$species)])) datacomp <- comparative.data(data = DataSubsetMCC,
# phy = TreeSubset, names.col = 'species', vcv = TRUE, na.omit = TRUE,
# warn.dropped = TRUE) modpiDisp <- pgls(log(EstPi) ~ log(DispDist_notInputed),
# data = datacomp, lambda = 'ML') saveRDS(modpiDisp,
# paste0(folder_path,'pgls/extra/pglspiDisp.rds'))
modpiDisp <- readRDS(paste0(folder_path, "pgls/extra/pglspiDisp.rds"))
summary(modpiDisp)##
## Call:
## pgls(formula = log(EstPi) ~ log(DispDist_notInputed), data = datacomp,
## lambda = "ML")
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.35708 -0.06904 -0.00077 0.07306 0.34367
##
## Branch length transformations:
##
## kappa [Fix] : 1.000
## lambda [ ML] : 0.614
## lower bound : 0.000, p = < 2.22e-16
## upper bound : 1.000, p = < 2.22e-16
## 95.0% CI : (0.496, 0.711)
## delta [Fix] : 1.000
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -3.966737 0.637180 -6.2255 6.123e-10 ***
## log(DispDist_notInputed) 0.014738 0.026064 0.5655 0.5718
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.1044 on 1598 degrees of freedom
## Multiple R-squared: 0.0002, Adjusted R-squared: -0.0004256
## F-statistic: 0.3197 on 1 and 1598 DF, p-value: 0.5718
par(mfrow = c(2, 2))
plot(modpiDisp)# modspRateDisp <- pgls(log(tipRate) ~ log(DispDist_notInputed), data =
# datacomp, lambda = 'ML') saveRDS(modspRateDisp,
# paste0(folder_path,'pgls/extra/pglsspRateDisp.rds'))
modspRateDisp <- readRDS(paste0(folder_path, "pgls/extra/pglsspRateDisp.rds"))
summary(modspRateDisp)##
## Call:
## pgls(formula = log(tipRate) ~ log(DispDist_notInputed), data = datacomp,
## lambda = "ML")
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.32894 -0.04146 0.00016 0.04290 0.23523
##
## Branch length transformations:
##
## kappa [Fix] : 1.000
## lambda [ ML] : 0.991
## lower bound : 0.000, p = < 2.22e-16
## upper bound : 1.000, p = < 2.22e-16
## 95.0% CI : (0.989, 0.993)
## delta [Fix] : 1.000
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -2.5579207 0.4665870 -5.4822 4.875e-08 ***
## log(DispDist_notInputed) -0.0064216 0.0059138 -1.0859 0.2777
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.0641 on 1598 degrees of freedom
## Multiple R-squared: 0.0007373, Adjusted R-squared: 0.000112
## F-statistic: 1.179 on 1 and 1598 DF, p-value: 0.2777
plot(modspRateDisp)# modpirateDisp <- pgls(log(EstPi) ~ log(tipRate) + log(DispDist_notInputed),
# data = datacomp, lambda = 'ML') saveRDS(modpirateDisp,
# paste0(folder_path,'pgls/extra/pglspirateDisp.rds'))
modpirateDisp <- readRDS(paste0(folder_path, "pgls/extra/pglspirateDisp.rds"))
summary(modpirateDisp)##
## Call:
## pgls(formula = log(EstPi) ~ log(tipRate) + log(DispDist_notInputed),
## data = datacomp, lambda = "ML")
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.302448 -0.061116 0.003635 0.063845 0.296543
##
## Branch length transformations:
##
## kappa [Fix] : 1.000
## lambda [ ML] : 0.504
## lower bound : 0.000, p = < 2.22e-16
## upper bound : 1.000, p = < 2.22e-16
## 95.0% CI : (0.355, 0.632)
## delta [Fix] : 1.000
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -4.829887 0.549434 -8.7907 < 2.2e-16 ***
## log(tipRate) -0.394741 0.069098 -5.7128 1.323e-08 ***
## log(DispDist_notInputed) -0.008585 0.024732 -0.3471 0.7285
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.0935 on 1597 degrees of freedom
## Multiple R-squared: 0.02003, Adjusted R-squared: 0.0188
## F-statistic: 16.32 on 2 and 1597 DF, p-value: 9.649e-08
plot(modpirateDisp)After phylogenetic correction, dispersal is not meaningful neither for genetic diversity nor for speciation rates.
Only 29 tips can be matched and with data for genetic diversity
With the data used by Buffalo 2021
buff <- read_tsv("/Users/acas/Dropbox/Post-docs/Morlon_Lab/Published_Datasets_Code/buffalo2021_paradox_variation/data/combined_data.tsv")
bgendivSpRate <- buff %>%
mutate(species = str_replace(species, " ", "_")) %>%
inner_join(gendivSpRate) %>%
filter(set %in% "treeMCC") %>%
select(species, tipRate, EstPi, diversity) %>%
drop_na()
rownames(bgendivSpRate) <- bgendivSpRate$species
TreeSubset <- drop.tip(treeMCC, as.character(treeMCC$tip.label[which(!treeMCC$tip.label %in%
bgendivSpRate$species)]))
datacomp <- comparative.data(data = data.frame(bgendivSpRate), phy = TreeSubset,
names.col = "species", vcv = TRUE, na.omit = TRUE, warn.dropped = TRUE)
summary(lm(log(diversity) ~ log(tipRate), data = datacomp$data))##
## Call:
## lm(formula = log(diversity) ~ log(tipRate), data = datacomp$data)
##
## Residuals:
## Min 1Q Median 3Q Max
## -3.01202 -0.36429 -0.01766 0.56813 1.86999
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -6.0183 0.5200 -11.573 5.66e-12 ***
## log(tipRate) 0.1254 0.3534 0.355 0.725
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.9998 on 27 degrees of freedom
## Multiple R-squared: 0.004643, Adjusted R-squared: -0.03222
## F-statistic: 0.1259 on 1 and 27 DF, p-value: 0.7254
modBuffDiv <- pgls(log(diversity) ~ log(tipRate), data = datacomp, lambda = "ML")
summary(modBuffDiv)##
## Call:
## pgls(formula = log(diversity) ~ log(tipRate), data = datacomp,
## lambda = "ML")
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.30329 -0.10389 -0.02971 0.03150 0.22426
##
## Branch length transformations:
##
## kappa [Fix] : 1.000
## lambda [ ML] : 0.972
## lower bound : 0.000, p = 0.0076068
## upper bound : 1.000, p = 0.22535
## 95.0% CI : (0.598, NA)
## delta [Fix] : 1.000
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -6.2540813 1.1483784 -5.4460 9.211e-06 ***
## log(tipRate) 0.0017352 0.3423443 0.0051 0.996
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.1302 on 27 degrees of freedom
## Multiple R-squared: 9.515e-07, Adjusted R-squared: -0.03704
## F-statistic: 2.569e-05 on 1 and 27 DF, p-value: 0.996
With the same species but with the mtDNA dataset
summary(lm(log(EstPi) ~ log(tipRate), data = datacomp$data))##
## Call:
## lm(formula = log(EstPi) ~ log(tipRate), data = datacomp$data)
##
## Residuals:
## Min 1Q Median 3Q Max
## -1.3955 -0.6474 0.2172 0.7539 1.7333
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -3.5826 0.4541 -7.889 1.76e-08 ***
## log(tipRate) 0.5512 0.3086 1.786 0.0853 .
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.873 on 27 degrees of freedom
## Multiple R-squared: 0.1057, Adjusted R-squared: 0.07254
## F-statistic: 3.19 on 1 and 27 DF, p-value: 0.08533
modEstPi <- pgls(log(EstPi) ~ log(tipRate), data = datacomp, lambda = "ML")
summary(modEstPi)##
## Call:
## pgls(formula = log(EstPi) ~ log(tipRate), data = datacomp, lambda = "ML")
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.20362 -0.09417 0.00968 0.05339 0.24818
##
## Branch length transformations:
##
## kappa [Fix] : 1.000
## lambda [ ML] : 0.956
## lower bound : 0.000, p = 0.013477
## upper bound : 1.000, p = 0.30822
## 95.0% CI : (0.413, NA)
## delta [Fix] : 1.000
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -3.11050 0.98663 -3.1527 0.003939 **
## log(tipRate) 0.55331 0.30476 1.8156 0.080556 .
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.1111 on 27 degrees of freedom
## Multiple R-squared: 0.1088, Adjusted R-squared: 0.0758
## F-statistic: 3.296 on 1 and 27 DF, p-value: 0.08056
summary(lm(log(EstPi) ~ log(diversity), data = datacomp$data))##
## Call:
## lm(formula = log(EstPi) ~ log(diversity), data = datacomp$data)
##
## Residuals:
## Min 1Q Median 3Q Max
## -1.50010 -0.59841 0.02811 0.40313 1.74595
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -1.6995 0.9846 -1.726 0.0958 .
## log(diversity) 0.4266 0.1571 2.715 0.0114 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.8183 on 27 degrees of freedom
## Multiple R-squared: 0.2144, Adjusted R-squared: 0.1853
## F-statistic: 7.369 on 1 and 27 DF, p-value: 0.01142
bgendivSpRate %>%
ggplot(aes(x = diversity, y = EstPi)) + geom_point() + geom_smooth(method = "lm") +
scale_x_log10() + scale_y_log10() + theme_bw() + labs(x = "nDNA_Buffalo", y = "mtDNA_thisStudy")modBuffDivmtDNA <- pgls(log(EstPi) ~ log(diversity), data = datacomp, lambda = "ML")
summary(modBuffDivmtDNA)##
## Call:
## pgls(formula = log(EstPi) ~ log(diversity), data = datacomp,
## lambda = "ML")
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.20853 -0.04562 0.03095 0.07641 0.16219
##
## Branch length transformations:
##
## kappa [Fix] : 1.000
## lambda [ ML] : 0.733
## lower bound : 0.000, p = 0.28287
## upper bound : 1.000, p = 0.11808
## 95.0% CI : (NA, NA)
## delta [Fix] : 1.000
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -2.29395 1.20116 -1.9098 0.06684 .
## log(diversity) 0.29073 0.16682 1.7428 0.09276 .
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.08583 on 27 degrees of freedom
## Multiple R-squared: 0.1011, Adjusted R-squared: 0.06782
## F-statistic: 3.037 on 1 and 27 DF, p-value: 0.09276
par(mfrow = c(2, 2))
plot(modBuffDivmtDNA)At least these two datasets are kind of correlated, but not sure what to conclude when correcting for the phylogeny gives a non-significant positive correlation.
bgendivSpRate %>%
pivot_longer(cols = EstPi:diversity) %>%
ggplot(aes(x = tipRate, y = value, color = name)) + geom_point() + geom_smooth(method = "lm") +
scale_x_log10() + scale_y_log10() + labs(x = "Tip Speciation rate", y = "Diversity") +
theme_bw() + scale_color_manual(name = "", values = hues::iwanthue(2), labels = c("nDNA_Buffalo",
"mtDNA_thisStudy"))With this highly filtered dataset the correlation is positive for both datasets but when corrected for phylogenetic structure is not significant.
library(ggtree)
setdiv <- bgendivSpRate %>%
select(species) %>%
mutate(inBuff = "yes")
nodedf <- data.frame(node = nodeid(MCCtree, setdiv$species))
ggtree(MCCtree, layout = "circular", size = 0.1) %<+% setdiv + geom_hilight(data = nodedf,
mapping = aes(node = node), color = "darkred", size = 1, show.legend = FALSE) +
geom_tippoint(aes(color = inBuff), na.rm = TRUE, show.legend = FALSE) + scale_colour_manual(values = "darkred",
na.value = NA)The dataset is not very evenly sampled across the Mammals phylogeny.
### get mammals families data
mccParF <- read.table("/Users/acas/Dropbox/Post-docs/Morlon_Lab/analyses/diversification/phylogenies/Upham/families/UphamClades_fromNathan_FBD_clades_fraction_fam.txt",
header = T, comment.char = "") %>%
mutate(stemAge = NA, crownAge = NA, class = "birds", rank = "family") %>%
dplyr::rename(n = Nsp_DNAonly_fam) %>%
left_join(read.table("/Users/acas/Dropbox/Post-docs/Morlon_Lab/analyses/diversification/phylogenies/Upham/families/families_pars.txt",
header = T), by = "clades")
gen.div <- read.delim(paste0(folder_path, "genetic_diversity/GenDiv_subsample5Ind.txt")) %>%
filter(EstPi > 0, !outPi %in% "out", !outTheta %in% "out")
clades <- read.delim(paste0(folder_path, "speciation_rate/input/MamPhy_5911sp_tipGenFamOrdCladeGenesSampPC.txt")) %>%
drop_na(PC) %>%
mutate(species = word(tiplabel, 1, 2, sep = "_"), clades = sub("^PC\\d+_", "",
PC)) %>%
select(species, clades, ord, fam) %>%
filter(species %in% treeMCC$tip.labels)
spRate <- read.delim("/Users/acas/Dropbox/Post-docs/Morlon_Lab/analyses/diversification/phylogenies/Upham/families/families_tipRate.txt") %>%
rename(tipRate = rate, fam = clades) %>%
left_join(., clades, by = c("species", "fam"))
gendivSpRate <- spRate %>%
left_join(., select(gen.div, species, EstPi, subPi_mean, EstTheta, subTheta_mean),
by = "species")
freqClade <- gendivSpRate %>%
select(species, fam, EstPi) %>%
drop_na() %>%
group_by(fam) %>%
tally() %>%
filter(n > 20, fam %in% mccParF$clades)
load(paste0(folder_path, "speciation_rate/output/upham_4064sp_FR_MCCposterior100.rdata")) #loads TreeSet
tr <- TreeSet[["treeMCC"]]
datacompClades <- list()
for (f in as.character(freqClade$fam)) {
DataSubset <- gendivSpRate %>%
select(species, fam, EstPi, tipRate) %>%
filter(fam %in% f) %>%
drop_na()
TreeSubset <- drop.tip(tr, as.character(tr$tip.label[which(!tr$tip.label %in%
DataSubset$species)]))
datacompClades[[f]] <- comparative.data(data = DataSubset, phy = TreeSubset,
names.col = "species", vcv = TRUE, na.omit = TRUE, warn.dropped = TRUE)
}
wd <- paste0(folder_path, "pgls/family/")
dir.create(wd, recursive = T)
pgls <- list()
for (clade in names(datacompClades)) {
tryCatch({
modpglsEstPi <- NULL
modpglsEstPi <- pgls(log(EstPi) ~ log(tipRate), data = datacompClades[[clade]],
lambda = "ML")
pgls[[clade]] <- list(EstPi = modpglsEstPi)
print(clade)
}, error = function(e) {
cat("Error in", conditionMessage(e), "\n")
})
}
saveRDS(pgls, paste0(wd, "family_gendivSpRate_results_treeMCC.rds"))wd <- paste0(folder_path, "pgls/family/")
mccParF <- read.table("/Users/acas/Dropbox/Post-docs/Morlon_Lab/analyses/diversification/phylogenies/Upham/families/UphamClades_fromNathan_FBD_clades_fraction_fam.txt",
header = T, comment.char = "") %>%
mutate(stemAge = NA, crownAge = NA, class = "birds", rank = "family") %>%
dplyr::rename(n = Nsp_DNAonly_fam) %>%
left_join(read.table("/Users/acas/Dropbox/Post-docs/Morlon_Lab/analyses/diversification/phylogenies/Upham/families/families_pars.txt",
header = T), by = "clades")
pgls <- readRDS(paste0(wd, "family_gendivSpRate_results_treeMCC.rds"))
pglssum <- data.frame()
for (j in names(pgls)) {
if (!is.null(pgls[[j]])) {
pglssum <- bind_rows(pglssum, data.frame(set = str_replace(word(i, -1, sep = "_"),
".rds", ""), analysis = j, df = summary(pgls[[j]]$EstPi)$df[2], term = rownames(summary(pgls[[j]]$EstPi)$coefficients),
summary(pgls[[j]]$EstPi)$coefficients, sigma = summary(pgls[[j]]$EstPi)$sigma,
lambda = pgls[[j]]$EstPi$param.CI$lambda$opt, lambda.CIlow = pgls[[j]]$EstPi$param.CI$lambda$ci.val[1],
lambda.CIup = pgls[[j]]$EstPi$param.CI$lambda$ci.val[2], stringsAsFactors = FALSE))
}
}
saveRDS(pglssum, paste0(wd, "family_gendivSpRate_PGLSresults.rds"))
pglsFam <- mccParF %>%
rename(analysis = clades) %>%
right_join(pglssum, by = "analysis", suffix = c(".clads", ".pgls")) %>%
filter(term %in% "log(tipRate)") %>%
select(analysis, sigma.clads:lambda_0, Estimate, Nsp_complete)
pglsFam %>%
rename(sigma = sigma.clads) %>%
pivot_longer(-c(analysis, Estimate, Nsp_complete)) %>%
ggplot(aes(x = value, y = Estimate)) + geom_point() + geom_smooth(method = "lm") +
facet_wrap(~name, scales = "free") + labs(y = "slope estimate", x = "hyperparameter values")summary(lm(data = pglsFam, Estimate ~ alpha))##
## Call:
## lm(formula = Estimate ~ alpha, data = pglsFam)
##
## Residuals:
## Min 1Q Median 3Q Max
## -4.9147 -0.5086 -0.0767 0.8215 2.7938
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 1.299 4.827 0.269 0.791
## alpha -1.898 5.391 -0.352 0.729
##
## Residual standard error: 1.628 on 16 degrees of freedom
## Multiple R-squared: 0.007687, Adjusted R-squared: -0.05433
## F-statistic: 0.1239 on 1 and 16 DF, p-value: 0.7294
Hyperparamters don’t add any explanatory power to the slope sign or intensity.
pglsFam %>%
ggplot(aes(x = Nsp_complete, y = Estimate)) + geom_point() + geom_smooth(method = "lm") +
labs(y = "Slope estimate", x = "Total number of in phylogeny per clade")summary(lm(data = pglsFam, Estimate ~ Nsp_complete))##
## Call:
## lm(formula = Estimate ~ Nsp_complete, data = pglsFam)
##
## Residuals:
## Min 1Q Median 3Q Max
## -4.9420 -0.5522 -0.0004 0.9505 2.7302
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -0.2991693 0.5385603 -0.555 0.586
## Nsp_complete -0.0004369 0.0017309 -0.252 0.804
##
## Residual standard error: 1.631 on 16 degrees of freedom
## Multiple R-squared: 0.003967, Adjusted R-squared: -0.05829
## F-statistic: 0.06372 on 1 and 16 DF, p-value: 0.8039
No relationship with species richness.
Have to use gls function with corPagel (similar to pgls from caper) because it gives an error for the optimizer
library(nlme)
dtset <- gendivSpRate %>%
filter(set %in% "treeMCC") %>%
drop_na(EstPi)
treeMCC <- TreeSet[["treeMCC"]]
TreeSubset <- drop.tip(treeMCC, as.character(treeMCC$tip.label[which(!treeMCC$tip.label %in%
dtset$species)]))
fit1 <- gls(log(tipRate) ~ log(EstPi), data = dtset, correlation = corPagel(1, phy = TreeSubset,
form = ~species), method = "ML")
summary(fit1)## Generalized least squares fit by maximum likelihood
## Model: log(tipRate) ~ log(EstPi)
## Data: dtset
## AIC BIC logLik
## -641.2292 -619.0816 324.6146
##
## Correlation Structure: corPagel
## Formula: ~species
## Parameter estimate(s):
## lambda
## 0.990391
##
## Coefficients:
## Value Std.Error t-value p-value
## (Intercept) -2.6059169 0.4615167 -5.646420 0.0000
## log(EstPi) -0.0084258 0.0039507 -2.132721 0.0331
##
## Correlation:
## (Intr)
## log(EstPi) 0.034
##
## Standardized residuals:
## Min Q1 Med Q3 Max
## -0.9148429 0.9357824 1.2830346 1.6500237 3.0912831
##
## Residual standard error: 0.8604941
## Degrees of freedom: 1876 total; 1874 residual